• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2005, 2007, The Android Open Source Project
2 // All rights reserved.
3 // Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // ---
32 // Author: Sanjay Ghemawat <opensource@google.com>
33 //
34 // A malloc that uses a per-thread cache to satisfy small malloc requests.
35 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
36 //
37 // See doc/tcmalloc.html for a high-level
38 // description of how this malloc works.
39 //
40 // SYNCHRONIZATION
41 //  1. The thread-specific lists are accessed without acquiring any locks.
42 //     This is safe because each such list is only accessed by one thread.
43 //  2. We have a lock per central free-list, and hold it while manipulating
44 //     the central free list for a particular size.
45 //  3. The central page allocator is protected by "pageheap_lock".
46 //  4. The pagemap (which maps from page-number to descriptor),
47 //     can be read without holding any locks, and written while holding
48 //     the "pageheap_lock".
49 //  5. To improve performance, a subset of the information one can get
50 //     from the pagemap is cached in a data structure, pagemap_cache_,
51 //     that atomically reads and writes its entries.  This cache can be
52 //     read and written without locking.
53 //
54 //     This multi-threaded access to the pagemap is safe for fairly
55 //     subtle reasons.  We basically assume that when an object X is
56 //     allocated by thread A and deallocated by thread B, there must
57 //     have been appropriate synchronization in the handoff of object
58 //     X from thread A to thread B.  The same logic applies to pagemap_cache_.
59 //
60 // THE PAGEID-TO-SIZECLASS CACHE
61 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_.  If this cache
62 // returns 0 for a particular PageID then that means "no information," not that
63 // the sizeclass is 0.  The cache may have stale information for pages that do
64 // not hold the beginning of any free()'able object.  Staleness is eliminated
65 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
66 // do_memalign() for all other relevant pages.
67 //
68 // TODO: Bias reclamation to larger addresses
69 // TODO: implement mallinfo/mallopt
70 // TODO: Better testing
71 //
72 // 9/28/2003 (new page-level allocator replaces ptmalloc2):
73 // * malloc/free of small objects goes from ~300 ns to ~50 ns.
74 // * allocation of a reasonably complicated struct
75 //   goes from about 1100 ns to about 300 ns.
76 
77 #include "config.h"
78 #include "FastMalloc.h"
79 
80 #include "Assertions.h"
81 #if ENABLE(JSC_MULTIPLE_THREADS)
82 #include <pthread.h>
83 #endif
84 
85 #ifndef NO_TCMALLOC_SAMPLES
86 #ifdef WTF_CHANGES
87 #define NO_TCMALLOC_SAMPLES
88 #endif
89 #endif
90 
91 #if !defined(USE_SYSTEM_MALLOC) && defined(NDEBUG)
92 #define FORCE_SYSTEM_MALLOC 0
93 #else
94 #define FORCE_SYSTEM_MALLOC 1
95 #endif
96 
97 #define TCMALLOC_TRACK_DECOMMITED_SPANS (HAVE(VIRTUALALLOC))
98 
99 #ifndef NDEBUG
100 namespace WTF {
101 
102 #if ENABLE(JSC_MULTIPLE_THREADS)
103 static pthread_key_t isForbiddenKey;
104 static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT;
initializeIsForbiddenKey()105 static void initializeIsForbiddenKey()
106 {
107   pthread_key_create(&isForbiddenKey, 0);
108 }
109 
isForbidden()110 static bool isForbidden()
111 {
112     pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
113     return !!pthread_getspecific(isForbiddenKey);
114 }
115 
fastMallocForbid()116 void fastMallocForbid()
117 {
118     pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
119     pthread_setspecific(isForbiddenKey, &isForbiddenKey);
120 }
121 
fastMallocAllow()122 void fastMallocAllow()
123 {
124     pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
125     pthread_setspecific(isForbiddenKey, 0);
126 }
127 
128 #else
129 
130 static bool staticIsForbidden;
131 static bool isForbidden()
132 {
133     return staticIsForbidden;
134 }
135 
136 void fastMallocForbid()
137 {
138     staticIsForbidden = true;
139 }
140 
141 void fastMallocAllow()
142 {
143     staticIsForbidden = false;
144 }
145 #endif // ENABLE(JSC_MULTIPLE_THREADS)
146 
147 } // namespace WTF
148 #endif // NDEBUG
149 
150 #include <string.h>
151 
152 namespace WTF {
153 
fastZeroedMalloc(size_t n)154 void* fastZeroedMalloc(size_t n)
155 {
156     void* result = fastMalloc(n);
157     memset(result, 0, n);
158     return result;
159 }
160 
tryFastZeroedMalloc(size_t n)161 void* tryFastZeroedMalloc(size_t n)
162 {
163     void* result = tryFastMalloc(n);
164     if (!result)
165         return 0;
166     memset(result, 0, n);
167     return result;
168 }
169 
170 } // namespace WTF
171 
172 #if FORCE_SYSTEM_MALLOC
173 
174 #include <stdlib.h>
175 #if !PLATFORM(WIN_OS)
176     #include <pthread.h>
177 #else
178     #include "windows.h"
179 #endif
180 
181 namespace WTF {
182 
tryFastMalloc(size_t n)183 void* tryFastMalloc(size_t n)
184 {
185     ASSERT(!isForbidden());
186     return malloc(n);
187 }
188 
fastMalloc(size_t n)189 void* fastMalloc(size_t n)
190 {
191     ASSERT(!isForbidden());
192     void* result = malloc(n);
193     if (!result)
194         CRASH();
195     return result;
196 }
197 
tryFastCalloc(size_t n_elements,size_t element_size)198 void* tryFastCalloc(size_t n_elements, size_t element_size)
199 {
200     ASSERT(!isForbidden());
201     return calloc(n_elements, element_size);
202 }
203 
fastCalloc(size_t n_elements,size_t element_size)204 void* fastCalloc(size_t n_elements, size_t element_size)
205 {
206     ASSERT(!isForbidden());
207     void* result = calloc(n_elements, element_size);
208     if (!result)
209         CRASH();
210     return result;
211 }
212 
fastFree(void * p)213 void fastFree(void* p)
214 {
215     ASSERT(!isForbidden());
216     free(p);
217 }
218 
tryFastRealloc(void * p,size_t n)219 void* tryFastRealloc(void* p, size_t n)
220 {
221     ASSERT(!isForbidden());
222     return realloc(p, n);
223 }
224 
fastRealloc(void * p,size_t n)225 void* fastRealloc(void* p, size_t n)
226 {
227     ASSERT(!isForbidden());
228     void* result = realloc(p, n);
229     if (!result)
230         CRASH();
231     return result;
232 }
233 
releaseFastMallocFreeMemory()234 void releaseFastMallocFreeMemory() { }
235 
fastMallocStatistics()236 FastMallocStatistics fastMallocStatistics()
237 {
238     FastMallocStatistics statistics = { 0, 0, 0, 0 };
239     return statistics;
240 }
241 
242 } // namespace WTF
243 
244 #if PLATFORM(DARWIN)
245 // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
246 // It will never be used in this case, so it's type and value are less interesting than its presence.
247 extern "C" const int jscore_fastmalloc_introspection = 0;
248 #endif
249 
250 #else // FORCE_SYSTEM_MALLOC
251 
252 #if HAVE(STDINT_H)
253 #include <stdint.h>
254 #elif HAVE(INTTYPES_H)
255 #include <inttypes.h>
256 #else
257 #include <sys/types.h>
258 #endif
259 
260 #include "AlwaysInline.h"
261 #include "Assertions.h"
262 #include "TCPackedCache.h"
263 #include "TCPageMap.h"
264 #include "TCSpinLock.h"
265 #include "TCSystemAlloc.h"
266 #include <algorithm>
267 #include <errno.h>
268 #include <new>
269 #include <pthread.h>
270 #include <stdarg.h>
271 #include <stddef.h>
272 #include <stdio.h>
273 #if COMPILER(MSVC)
274 #ifndef WIN32_LEAN_AND_MEAN
275 #define WIN32_LEAN_AND_MEAN
276 #endif
277 #include <windows.h>
278 #endif
279 
280 #if WTF_CHANGES
281 
282 #if PLATFORM(DARWIN)
283 #include "MallocZoneSupport.h"
284 #include <wtf/HashSet.h>
285 #endif
286 
287 #ifndef PRIuS
288 #define PRIuS "zu"
289 #endif
290 
291 // Calling pthread_getspecific through a global function pointer is faster than a normal
292 // call to the function on Mac OS X, and it's used in performance-critical code. So we
293 // use a function pointer. But that's not necessarily faster on other platforms, and we had
294 // problems with this technique on Windows, so we'll do this only on Mac OS X.
295 #if PLATFORM(DARWIN)
296 static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific;
297 #define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
298 #endif
299 
300 #define DEFINE_VARIABLE(type, name, value, meaning) \
301   namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
302   type FLAGS_##name(value);                                \
303   char FLAGS_no##name;                                                        \
304   }                                                                           \
305   using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
306 
307 #define DEFINE_int64(name, value, meaning) \
308   DEFINE_VARIABLE(int64_t, name, value, meaning)
309 
310 #define DEFINE_double(name, value, meaning) \
311   DEFINE_VARIABLE(double, name, value, meaning)
312 
313 namespace WTF {
314 
315 #define malloc fastMalloc
316 #define calloc fastCalloc
317 #define free fastFree
318 #define realloc fastRealloc
319 
320 #define MESSAGE LOG_ERROR
321 #define CHECK_CONDITION ASSERT
322 
323 #if PLATFORM(DARWIN)
324 class TCMalloc_PageHeap;
325 class TCMalloc_ThreadCache;
326 class TCMalloc_Central_FreeListPadded;
327 
328 class FastMallocZone {
329 public:
330     static void init();
331 
332     static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t);
goodSize(malloc_zone_t *,size_t size)333     static size_t goodSize(malloc_zone_t*, size_t size) { return size; }
check(malloc_zone_t *)334     static boolean_t check(malloc_zone_t*) { return true; }
print(malloc_zone_t *,boolean_t)335     static void  print(malloc_zone_t*, boolean_t) { }
log(malloc_zone_t *,void *)336     static void log(malloc_zone_t*, void*) { }
forceLock(malloc_zone_t *)337     static void forceLock(malloc_zone_t*) { }
forceUnlock(malloc_zone_t *)338     static void forceUnlock(malloc_zone_t*) { }
statistics(malloc_zone_t *,malloc_statistics_t * stats)339     static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); }
340 
341 private:
342     FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*);
343     static size_t size(malloc_zone_t*, const void*);
344     static void* zoneMalloc(malloc_zone_t*, size_t);
345     static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size);
346     static void zoneFree(malloc_zone_t*, void*);
347     static void* zoneRealloc(malloc_zone_t*, void*, size_t);
zoneValloc(malloc_zone_t *,size_t)348     static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
zoneDestroy(malloc_zone_t *)349     static void zoneDestroy(malloc_zone_t*) { }
350 
351     malloc_zone_t m_zone;
352     TCMalloc_PageHeap* m_pageHeap;
353     TCMalloc_ThreadCache** m_threadHeaps;
354     TCMalloc_Central_FreeListPadded* m_centralCaches;
355 };
356 
357 #endif
358 
359 #endif
360 
361 #ifndef WTF_CHANGES
362 // This #ifdef should almost never be set.  Set NO_TCMALLOC_SAMPLES if
363 // you're porting to a system where you really can't get a stacktrace.
364 #ifdef NO_TCMALLOC_SAMPLES
365 // We use #define so code compiles even if you #include stacktrace.h somehow.
366 # define GetStackTrace(stack, depth, skip)  (0)
367 #else
368 # include <google/stacktrace.h>
369 #endif
370 #endif
371 
372 // Even if we have support for thread-local storage in the compiler
373 // and linker, the OS may not support it.  We need to check that at
374 // runtime.  Right now, we have to keep a manual set of "bad" OSes.
375 #if defined(HAVE_TLS)
376   static bool kernel_supports_tls = false;      // be conservative
KernelSupportsTLS()377   static inline bool KernelSupportsTLS() {
378     return kernel_supports_tls;
379   }
380 # if !HAVE_DECL_UNAME   // if too old for uname, probably too old for TLS
CheckIfKernelSupportsTLS()381     static void CheckIfKernelSupportsTLS() {
382       kernel_supports_tls = false;
383     }
384 # else
385 #   include <sys/utsname.h>    // DECL_UNAME checked for <sys/utsname.h> too
CheckIfKernelSupportsTLS()386     static void CheckIfKernelSupportsTLS() {
387       struct utsname buf;
388       if (uname(&buf) != 0) {   // should be impossible
389         MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
390         kernel_supports_tls = false;
391       } else if (strcasecmp(buf.sysname, "linux") == 0) {
392         // The linux case: the first kernel to support TLS was 2.6.0
393         if (buf.release[0] < '2' && buf.release[1] == '.')    // 0.x or 1.x
394           kernel_supports_tls = false;
395         else if (buf.release[0] == '2' && buf.release[1] == '.' &&
396                  buf.release[2] >= '0' && buf.release[2] < '6' &&
397                  buf.release[3] == '.')                       // 2.0 - 2.5
398           kernel_supports_tls = false;
399         else
400           kernel_supports_tls = true;
401       } else {        // some other kernel, we'll be optimisitic
402         kernel_supports_tls = true;
403       }
404       // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
405     }
406 #  endif  // HAVE_DECL_UNAME
407 #endif    // HAVE_TLS
408 
409 // __THROW is defined in glibc systems.  It means, counter-intuitively,
410 // "This function will never throw an exception."  It's an optional
411 // optimization tool, but we may need to use it to match glibc prototypes.
412 #ifndef __THROW    // I guess we're not on a glibc system
413 # define __THROW   // __THROW is just an optimization, so ok to make it ""
414 #endif
415 
416 //-------------------------------------------------------------------
417 // Configuration
418 //-------------------------------------------------------------------
419 
420 // Not all possible combinations of the following parameters make
421 // sense.  In particular, if kMaxSize increases, you may have to
422 // increase kNumClasses as well.
423 static const size_t kPageShift  = 12;
424 static const size_t kPageSize   = 1 << kPageShift;
425 static const size_t kMaxSize    = 8u * kPageSize;
426 static const size_t kAlignShift = 3;
427 static const size_t kAlignment  = 1 << kAlignShift;
428 static const size_t kNumClasses = 68;
429 
430 // Allocates a big block of memory for the pagemap once we reach more than
431 // 128MB
432 static const size_t kPageMapBigAllocationThreshold = 128 << 20;
433 
434 // Minimum number of pages to fetch from system at a time.  Must be
435 // significantly bigger than kBlockSize to amortize system-call
436 // overhead, and also to reduce external fragementation.  Also, we
437 // should keep this value big because various incarnations of Linux
438 // have small limits on the number of mmap() regions per
439 // address-space.
440 static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);
441 
442 // Number of objects to move between a per-thread list and a central
443 // list in one shot.  We want this to be not too small so we can
444 // amortize the lock overhead for accessing the central list.  Making
445 // it too big may temporarily cause unnecessary memory wastage in the
446 // per-thread free list until the scavenger cleans up the list.
447 static int num_objects_to_move[kNumClasses];
448 
449 // Maximum length we allow a per-thread free-list to have before we
450 // move objects from it into the corresponding central free-list.  We
451 // want this big to avoid locking the central free-list too often.  It
452 // should not hurt to make this list somewhat big because the
453 // scavenging code will shrink it down when its contents are not in use.
454 static const int kMaxFreeListLength = 256;
455 
456 // Lower and upper bounds on the per-thread cache sizes
457 static const size_t kMinThreadCacheSize = kMaxSize * 2;
458 static const size_t kMaxThreadCacheSize = 2 << 20;
459 
460 // Default bound on the total amount of thread caches
461 static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
462 
463 // For all span-lengths < kMaxPages we keep an exact-size list.
464 // REQUIRED: kMaxPages >= kMinSystemAlloc;
465 static const size_t kMaxPages = kMinSystemAlloc;
466 
467 /* The smallest prime > 2^n */
468 static int primes_list[] = {
469     // Small values might cause high rates of sampling
470     // and hence commented out.
471     // 2, 5, 11, 17, 37, 67, 131, 257,
472     // 521, 1031, 2053, 4099, 8209, 16411,
473     32771, 65537, 131101, 262147, 524309, 1048583,
474     2097169, 4194319, 8388617, 16777259, 33554467 };
475 
476 // Twice the approximate gap between sampling actions.
477 // I.e., we take one sample approximately once every
478 //      tcmalloc_sample_parameter/2
479 // bytes of allocation, i.e., ~ once every 128KB.
480 // Must be a prime number.
481 #ifdef NO_TCMALLOC_SAMPLES
482 DEFINE_int64(tcmalloc_sample_parameter, 0,
483              "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
484 static size_t sample_period = 0;
485 #else
486 DEFINE_int64(tcmalloc_sample_parameter, 262147,
487          "Twice the approximate gap between sampling actions."
488          " Must be a prime number. Otherwise will be rounded up to a "
489          " larger prime number");
490 static size_t sample_period = 262147;
491 #endif
492 
493 // Protects sample_period above
494 static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
495 
496 // Parameters for controlling how fast memory is returned to the OS.
497 
498 DEFINE_double(tcmalloc_release_rate, 1,
499               "Rate at which we release unused memory to the system.  "
500               "Zero means we never release memory back to the system.  "
501               "Increase this flag to return memory faster; decrease it "
502               "to return memory slower.  Reasonable rates are in the "
503               "range [0,10]");
504 
505 //-------------------------------------------------------------------
506 // Mapping from size to size_class and vice versa
507 //-------------------------------------------------------------------
508 
509 // Sizes <= 1024 have an alignment >= 8.  So for such sizes we have an
510 // array indexed by ceil(size/8).  Sizes > 1024 have an alignment >= 128.
511 // So for these larger sizes we have an array indexed by ceil(size/128).
512 //
513 // We flatten both logical arrays into one physical array and use
514 // arithmetic to compute an appropriate index.  The constants used by
515 // ClassIndex() were selected to make the flattening work.
516 //
517 // Examples:
518 //   Size       Expression                      Index
519 //   -------------------------------------------------------
520 //   0          (0 + 7) / 8                     0
521 //   1          (1 + 7) / 8                     1
522 //   ...
523 //   1024       (1024 + 7) / 8                  128
524 //   1025       (1025 + 127 + (120<<7)) / 128   129
525 //   ...
526 //   32768      (32768 + 127 + (120<<7)) / 128  376
527 static const size_t kMaxSmallSize = 1024;
528 static const int shift_amount[2] = { 3, 7 };  // For divides by 8 or 128
529 static const int add_amount[2] = { 7, 127 + (120 << 7) };
530 static unsigned char class_array[377];
531 
532 // Compute index of the class_array[] entry for a given size
ClassIndex(size_t s)533 static inline int ClassIndex(size_t s) {
534   const int i = (s > kMaxSmallSize);
535   return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);
536 }
537 
538 // Mapping from size class to max size storable in that class
539 static size_t class_to_size[kNumClasses];
540 
541 // Mapping from size class to number of pages to allocate at a time
542 static size_t class_to_pages[kNumClasses];
543 
544 // TransferCache is used to cache transfers of num_objects_to_move[size_class]
545 // back and forth between thread caches and the central cache for a given size
546 // class.
547 struct TCEntry {
548   void *head;  // Head of chain of objects.
549   void *tail;  // Tail of chain of objects.
550 };
551 // A central cache freelist can have anywhere from 0 to kNumTransferEntries
552 // slots to put link list chains into.  To keep memory usage bounded the total
553 // number of TCEntries across size classes is fixed.  Currently each size
554 // class is initially given one TCEntry which also means that the maximum any
555 // one class can have is kNumClasses.
556 static const int kNumTransferEntries = kNumClasses;
557 
558 // Note: the following only works for "n"s that fit in 32-bits, but
559 // that is fine since we only use it for small sizes.
LgFloor(size_t n)560 static inline int LgFloor(size_t n) {
561   int log = 0;
562   for (int i = 4; i >= 0; --i) {
563     int shift = (1 << i);
564     size_t x = n >> shift;
565     if (x != 0) {
566       n = x;
567       log += shift;
568     }
569   }
570   ASSERT(n == 1);
571   return log;
572 }
573 
574 // Some very basic linked list functions for dealing with using void * as
575 // storage.
576 
SLL_Next(void * t)577 static inline void *SLL_Next(void *t) {
578   return *(reinterpret_cast<void**>(t));
579 }
580 
SLL_SetNext(void * t,void * n)581 static inline void SLL_SetNext(void *t, void *n) {
582   *(reinterpret_cast<void**>(t)) = n;
583 }
584 
SLL_Push(void ** list,void * element)585 static inline void SLL_Push(void **list, void *element) {
586   SLL_SetNext(element, *list);
587   *list = element;
588 }
589 
SLL_Pop(void ** list)590 static inline void *SLL_Pop(void **list) {
591   void *result = *list;
592   *list = SLL_Next(*list);
593   return result;
594 }
595 
596 
597 // Remove N elements from a linked list to which head points.  head will be
598 // modified to point to the new head.  start and end will point to the first
599 // and last nodes of the range.  Note that end will point to NULL after this
600 // function is called.
SLL_PopRange(void ** head,int N,void ** start,void ** end)601 static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
602   if (N == 0) {
603     *start = NULL;
604     *end = NULL;
605     return;
606   }
607 
608   void *tmp = *head;
609   for (int i = 1; i < N; ++i) {
610     tmp = SLL_Next(tmp);
611   }
612 
613   *start = *head;
614   *end = tmp;
615   *head = SLL_Next(tmp);
616   // Unlink range from list.
617   SLL_SetNext(tmp, NULL);
618 }
619 
SLL_PushRange(void ** head,void * start,void * end)620 static inline void SLL_PushRange(void **head, void *start, void *end) {
621   if (!start) return;
622   SLL_SetNext(end, *head);
623   *head = start;
624 }
625 
SLL_Size(void * head)626 static inline size_t SLL_Size(void *head) {
627   int count = 0;
628   while (head) {
629     count++;
630     head = SLL_Next(head);
631   }
632   return count;
633 }
634 
635 // Setup helper functions.
636 
SizeClass(size_t size)637 static ALWAYS_INLINE size_t SizeClass(size_t size) {
638   return class_array[ClassIndex(size)];
639 }
640 
641 // Get the byte-size for a specified class
ByteSizeForClass(size_t cl)642 static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) {
643   return class_to_size[cl];
644 }
NumMoveSize(size_t size)645 static int NumMoveSize(size_t size) {
646   if (size == 0) return 0;
647   // Use approx 64k transfers between thread and central caches.
648   int num = static_cast<int>(64.0 * 1024.0 / size);
649   if (num < 2) num = 2;
650   // Clamp well below kMaxFreeListLength to avoid ping pong between central
651   // and thread caches.
652   if (num > static_cast<int>(0.8 * kMaxFreeListLength))
653     num = static_cast<int>(0.8 * kMaxFreeListLength);
654 
655   // Also, avoid bringing in too many objects into small object free
656   // lists.  There are lots of such lists, and if we allow each one to
657   // fetch too many at a time, we end up having to scavenge too often
658   // (especially when there are lots of threads and each thread gets a
659   // small allowance for its thread cache).
660   //
661   // TODO: Make thread cache free list sizes dynamic so that we do not
662   // have to equally divide a fixed resource amongst lots of threads.
663   if (num > 32) num = 32;
664 
665   return num;
666 }
667 
668 // Initialize the mapping arrays
InitSizeClasses()669 static void InitSizeClasses() {
670   // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
671   if (ClassIndex(0) < 0) {
672     MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
673     CRASH();
674   }
675   if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) {
676     MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
677     CRASH();
678   }
679 
680   // Compute the size classes we want to use
681   size_t sc = 1;   // Next size class to assign
682   unsigned char alignshift = kAlignShift;
683   int last_lg = -1;
684   for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
685     int lg = LgFloor(size);
686     if (lg > last_lg) {
687       // Increase alignment every so often.
688       //
689       // Since we double the alignment every time size doubles and
690       // size >= 128, this means that space wasted due to alignment is
691       // at most 16/128 i.e., 12.5%.  Plus we cap the alignment at 256
692       // bytes, so the space wasted as a percentage starts falling for
693       // sizes > 2K.
694       if ((lg >= 7) && (alignshift < 8)) {
695         alignshift++;
696       }
697       last_lg = lg;
698     }
699 
700     // Allocate enough pages so leftover is less than 1/8 of total.
701     // This bounds wasted space to at most 12.5%.
702     size_t psize = kPageSize;
703     while ((psize % size) > (psize >> 3)) {
704       psize += kPageSize;
705     }
706     const size_t my_pages = psize >> kPageShift;
707 
708     if (sc > 1 && my_pages == class_to_pages[sc-1]) {
709       // See if we can merge this into the previous class without
710       // increasing the fragmentation of the previous class.
711       const size_t my_objects = (my_pages << kPageShift) / size;
712       const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
713                                   / class_to_size[sc-1];
714       if (my_objects == prev_objects) {
715         // Adjust last class to include this size
716         class_to_size[sc-1] = size;
717         continue;
718       }
719     }
720 
721     // Add new class
722     class_to_pages[sc] = my_pages;
723     class_to_size[sc] = size;
724     sc++;
725   }
726   if (sc != kNumClasses) {
727     MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n",
728             sc, int(kNumClasses));
729     CRASH();
730   }
731 
732   // Initialize the mapping arrays
733   int next_size = 0;
734   for (unsigned char c = 1; c < kNumClasses; c++) {
735     const size_t max_size_in_class = class_to_size[c];
736     for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) {
737       class_array[ClassIndex(s)] = c;
738     }
739     next_size = static_cast<int>(max_size_in_class + kAlignment);
740   }
741 
742   // Double-check sizes just to be safe
743   for (size_t size = 0; size <= kMaxSize; size++) {
744     const size_t sc = SizeClass(size);
745     if (sc == 0) {
746       MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
747       CRASH();
748     }
749     if (sc > 1 && size <= class_to_size[sc-1]) {
750       MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS
751               "\n", sc, size);
752       CRASH();
753     }
754     if (sc >= kNumClasses) {
755       MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
756       CRASH();
757     }
758     const size_t s = class_to_size[sc];
759     if (size > s) {
760      MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
761       CRASH();
762     }
763     if (s == 0) {
764       MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
765       CRASH();
766     }
767   }
768 
769   // Initialize the num_objects_to_move array.
770   for (size_t cl = 1; cl  < kNumClasses; ++cl) {
771     num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
772   }
773 
774 #ifndef WTF_CHANGES
775   if (false) {
776     // Dump class sizes and maximum external wastage per size class
777     for (size_t cl = 1; cl  < kNumClasses; ++cl) {
778       const int alloc_size = class_to_pages[cl] << kPageShift;
779       const int alloc_objs = alloc_size / class_to_size[cl];
780       const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
781       const int max_waste = alloc_size - min_used;
782       MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
783               int(cl),
784               int(class_to_size[cl-1] + 1),
785               int(class_to_size[cl]),
786               int(class_to_pages[cl] << kPageShift),
787               max_waste * 100.0 / alloc_size
788               );
789     }
790   }
791 #endif
792 }
793 
794 // -------------------------------------------------------------------------
795 // Simple allocator for objects of a specified type.  External locking
796 // is required before accessing one of these objects.
797 // -------------------------------------------------------------------------
798 
799 // Metadata allocator -- keeps stats about how many bytes allocated
800 static uint64_t metadata_system_bytes = 0;
MetaDataAlloc(size_t bytes)801 static void* MetaDataAlloc(size_t bytes) {
802   void* result = TCMalloc_SystemAlloc(bytes, 0);
803   if (result != NULL) {
804     metadata_system_bytes += bytes;
805   }
806   return result;
807 }
808 
809 template <class T>
810 class PageHeapAllocator {
811  private:
812   // How much to allocate from system at a time
813   static const size_t kAllocIncrement = 32 << 10;
814 
815   // Aligned size of T
816   static const size_t kAlignedSize
817   = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
818 
819   // Free area from which to carve new objects
820   char* free_area_;
821   size_t free_avail_;
822 
823   // Free list of already carved objects
824   void* free_list_;
825 
826   // Number of allocated but unfreed objects
827   int inuse_;
828 
829  public:
Init()830   void Init() {
831     ASSERT(kAlignedSize <= kAllocIncrement);
832     inuse_ = 0;
833     free_area_ = NULL;
834     free_avail_ = 0;
835     free_list_ = NULL;
836   }
837 
New()838   T* New() {
839     // Consult free list
840     void* result;
841     if (free_list_ != NULL) {
842       result = free_list_;
843       free_list_ = *(reinterpret_cast<void**>(result));
844     } else {
845       if (free_avail_ < kAlignedSize) {
846         // Need more room
847         free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
848         if (free_area_ == NULL) CRASH();
849         free_avail_ = kAllocIncrement;
850       }
851       result = free_area_;
852       free_area_ += kAlignedSize;
853       free_avail_ -= kAlignedSize;
854     }
855     inuse_++;
856     return reinterpret_cast<T*>(result);
857   }
858 
Delete(T * p)859   void Delete(T* p) {
860     *(reinterpret_cast<void**>(p)) = free_list_;
861     free_list_ = p;
862     inuse_--;
863   }
864 
inuse() const865   int inuse() const { return inuse_; }
866 };
867 
868 // -------------------------------------------------------------------------
869 // Span - a contiguous run of pages
870 // -------------------------------------------------------------------------
871 
872 // Type that can hold a page number
873 typedef uintptr_t PageID;
874 
875 // Type that can hold the length of a run of pages
876 typedef uintptr_t Length;
877 
878 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
879 
880 // Convert byte size into pages.  This won't overflow, but may return
881 // an unreasonably large value if bytes is huge enough.
pages(size_t bytes)882 static inline Length pages(size_t bytes) {
883   return (bytes >> kPageShift) +
884       ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
885 }
886 
887 // Convert a user size into the number of bytes that will actually be
888 // allocated
AllocationSize(size_t bytes)889 static size_t AllocationSize(size_t bytes) {
890   if (bytes > kMaxSize) {
891     // Large object: we allocate an integral number of pages
892     ASSERT(bytes <= (kMaxValidPages << kPageShift));
893     return pages(bytes) << kPageShift;
894   } else {
895     // Small object: find the size class to which it belongs
896     return ByteSizeForClass(SizeClass(bytes));
897   }
898 }
899 
900 // Information kept for a span (a contiguous run of pages).
901 struct Span {
902   PageID        start;          // Starting page number
903   Length        length;         // Number of pages in span
904   Span*         next;           // Used when in link list
905   Span*         prev;           // Used when in link list
906   void*         objects;        // Linked list of free objects
907   unsigned int  free : 1;       // Is the span free
908 #ifndef NO_TCMALLOC_SAMPLES
909   unsigned int  sample : 1;     // Sampled object?
910 #endif
911   unsigned int  sizeclass : 8;  // Size-class for small objects (or 0)
912   unsigned int  refcount : 11;  // Number of non-free objects
913   bool decommitted : 1;
914 
915 #undef SPAN_HISTORY
916 #ifdef SPAN_HISTORY
917   // For debugging, we can keep a log events per span
918   int nexthistory;
919   char history[64];
920   int value[64];
921 #endif
922 };
923 
924 #if TCMALLOC_TRACK_DECOMMITED_SPANS
925 #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
926 #else
927 #define ASSERT_SPAN_COMMITTED(span)
928 #endif
929 
930 #ifdef SPAN_HISTORY
Event(Span * span,char op,int v=0)931 void Event(Span* span, char op, int v = 0) {
932   span->history[span->nexthistory] = op;
933   span->value[span->nexthistory] = v;
934   span->nexthistory++;
935   if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
936 }
937 #else
938 #define Event(s,o,v) ((void) 0)
939 #endif
940 
941 // Allocator/deallocator for spans
942 static PageHeapAllocator<Span> span_allocator;
NewSpan(PageID p,Length len)943 static Span* NewSpan(PageID p, Length len) {
944   Span* result = span_allocator.New();
945   memset(result, 0, sizeof(*result));
946   result->start = p;
947   result->length = len;
948 #ifdef SPAN_HISTORY
949   result->nexthistory = 0;
950 #endif
951   return result;
952 }
953 
DeleteSpan(Span * span)954 static inline void DeleteSpan(Span* span) {
955 #ifndef NDEBUG
956   // In debug mode, trash the contents of deleted Spans
957   memset(span, 0x3f, sizeof(*span));
958 #endif
959   span_allocator.Delete(span);
960 }
961 
962 // -------------------------------------------------------------------------
963 // Doubly linked list of spans.
964 // -------------------------------------------------------------------------
965 
DLL_Init(Span * list)966 static inline void DLL_Init(Span* list) {
967   list->next = list;
968   list->prev = list;
969 }
970 
DLL_Remove(Span * span)971 static inline void DLL_Remove(Span* span) {
972   span->prev->next = span->next;
973   span->next->prev = span->prev;
974   span->prev = NULL;
975   span->next = NULL;
976 }
977 
DLL_IsEmpty(const Span * list)978 static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) {
979   return list->next == list;
980 }
981 
DLL_Length(const Span * list)982 static int DLL_Length(const Span* list) {
983   int result = 0;
984   for (Span* s = list->next; s != list; s = s->next) {
985     result++;
986   }
987   return result;
988 }
989 
990 #if 0 /* Not needed at the moment -- causes compiler warnings if not used */
991 static void DLL_Print(const char* label, const Span* list) {
992   MESSAGE("%-10s %p:", label, list);
993   for (const Span* s = list->next; s != list; s = s->next) {
994     MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
995   }
996   MESSAGE("\n");
997 }
998 #endif
999 
DLL_Prepend(Span * list,Span * span)1000 static inline void DLL_Prepend(Span* list, Span* span) {
1001   ASSERT(span->next == NULL);
1002   ASSERT(span->prev == NULL);
1003   span->next = list->next;
1004   span->prev = list;
1005   list->next->prev = span;
1006   list->next = span;
1007 }
1008 
1009 // -------------------------------------------------------------------------
1010 // Stack traces kept for sampled allocations
1011 //   The following state is protected by pageheap_lock_.
1012 // -------------------------------------------------------------------------
1013 
1014 // size/depth are made the same size as a pointer so that some generic
1015 // code below can conveniently cast them back and forth to void*.
1016 static const int kMaxStackDepth = 31;
1017 struct StackTrace {
1018   uintptr_t size;          // Size of object
1019   uintptr_t depth;         // Number of PC values stored in array below
1020   void*     stack[kMaxStackDepth];
1021 };
1022 static PageHeapAllocator<StackTrace> stacktrace_allocator;
1023 static Span sampled_objects;
1024 
1025 // -------------------------------------------------------------------------
1026 // Map from page-id to per-page data
1027 // -------------------------------------------------------------------------
1028 
1029 // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
1030 // We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
1031 // because sometimes the sizeclass is all the information we need.
1032 
1033 // Selector class -- general selector uses 3-level map
1034 template <int BITS> class MapSelector {
1035  public:
1036   typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
1037   typedef PackedCache<BITS, uint64_t> CacheType;
1038 };
1039 
1040 #if defined(WTF_CHANGES)
1041 #if PLATFORM(X86_64)
1042 // On all known X86-64 platforms, the upper 16 bits are always unused and therefore
1043 // can be excluded from the PageMap key.
1044 // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
1045 
1046 static const size_t kBitsUnusedOn64Bit = 16;
1047 #else
1048 static const size_t kBitsUnusedOn64Bit = 0;
1049 #endif
1050 
1051 // A three-level map for 64-bit machines
1052 template <> class MapSelector<64> {
1053  public:
1054   typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type;
1055   typedef PackedCache<64, uint64_t> CacheType;
1056 };
1057 #endif
1058 
1059 // A two-level map for 32-bit machines
1060 template <> class MapSelector<32> {
1061  public:
1062   typedef TCMalloc_PageMap2<32 - kPageShift> Type;
1063   typedef PackedCache<32 - kPageShift, uint16_t> CacheType;
1064 };
1065 
1066 // -------------------------------------------------------------------------
1067 // Page-level allocator
1068 //  * Eager coalescing
1069 //
1070 // Heap for page-level allocation.  We allow allocating and freeing a
1071 // contiguous runs of pages (called a "span").
1072 // -------------------------------------------------------------------------
1073 
1074 class TCMalloc_PageHeap {
1075  public:
1076   void init();
1077 
1078   // Allocate a run of "n" pages.  Returns zero if out of memory.
1079   Span* New(Length n);
1080 
1081   // Delete the span "[p, p+n-1]".
1082   // REQUIRES: span was returned by earlier call to New() and
1083   //           has not yet been deleted.
1084   void Delete(Span* span);
1085 
1086   // Mark an allocated span as being used for small objects of the
1087   // specified size-class.
1088   // REQUIRES: span was returned by an earlier call to New()
1089   //           and has not yet been deleted.
1090   void RegisterSizeClass(Span* span, size_t sc);
1091 
1092   // Split an allocated span into two spans: one of length "n" pages
1093   // followed by another span of length "span->length - n" pages.
1094   // Modifies "*span" to point to the first span of length "n" pages.
1095   // Returns a pointer to the second span.
1096   //
1097   // REQUIRES: "0 < n < span->length"
1098   // REQUIRES: !span->free
1099   // REQUIRES: span->sizeclass == 0
1100   Span* Split(Span* span, Length n);
1101 
1102   // Return the descriptor for the specified page.
GetDescriptor(PageID p) const1103   inline Span* GetDescriptor(PageID p) const {
1104     return reinterpret_cast<Span*>(pagemap_.get(p));
1105   }
1106 
1107 #ifdef WTF_CHANGES
GetDescriptorEnsureSafe(PageID p)1108   inline Span* GetDescriptorEnsureSafe(PageID p)
1109   {
1110       pagemap_.Ensure(p, 1);
1111       return GetDescriptor(p);
1112   }
1113 
1114   size_t ReturnedBytes() const;
1115 #endif
1116 
1117   // Dump state to stderr
1118 #ifndef WTF_CHANGES
1119   void Dump(TCMalloc_Printer* out);
1120 #endif
1121 
1122   // Return number of bytes allocated from system
SystemBytes() const1123   inline uint64_t SystemBytes() const { return system_bytes_; }
1124 
1125   // Return number of free bytes in heap
FreeBytes() const1126   uint64_t FreeBytes() const {
1127     return (static_cast<uint64_t>(free_pages_) << kPageShift);
1128   }
1129 
1130   bool Check();
1131   bool CheckList(Span* list, Length min_pages, Length max_pages);
1132 
1133   // Release all pages on the free list for reuse by the OS:
1134   void ReleaseFreePages();
1135 
1136   // Return 0 if we have no information, or else the correct sizeclass for p.
1137   // Reads and writes to pagemap_cache_ do not require locking.
1138   // The entries are 64 bits on 64-bit hardware and 16 bits on
1139   // 32-bit hardware, and we don't mind raciness as long as each read of
1140   // an entry yields a valid entry, not a partially updated entry.
GetSizeClassIfCached(PageID p) const1141   size_t GetSizeClassIfCached(PageID p) const {
1142     return pagemap_cache_.GetOrDefault(p, 0);
1143   }
CacheSizeClass(PageID p,size_t cl) const1144   void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
1145 
1146  private:
1147   // Pick the appropriate map and cache types based on pointer size
1148   typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
1149   typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
1150   PageMap pagemap_;
1151   mutable PageMapCache pagemap_cache_;
1152 
1153   // We segregate spans of a given size into two circular linked
1154   // lists: one for normal spans, and one for spans whose memory
1155   // has been returned to the system.
1156   struct SpanList {
1157     Span        normal;
1158     Span        returned;
1159   };
1160 
1161   // List of free spans of length >= kMaxPages
1162   SpanList large_;
1163 
1164   // Array mapping from span length to a doubly linked list of free spans
1165   SpanList free_[kMaxPages];
1166 
1167   // Number of pages kept in free lists
1168   uintptr_t free_pages_;
1169 
1170   // Bytes allocated from system
1171   uint64_t system_bytes_;
1172 
1173   bool GrowHeap(Length n);
1174 
1175   // REQUIRES   span->length >= n
1176   // Remove span from its free list, and move any leftover part of
1177   // span into appropriate free lists.  Also update "span" to have
1178   // length exactly "n" and mark it as non-free so it can be returned
1179   // to the client.
1180   //
1181   // "released" is true iff "span" was found on a "returned" list.
1182   void Carve(Span* span, Length n, bool released);
1183 
RecordSpan(Span * span)1184   void RecordSpan(Span* span) {
1185     pagemap_.set(span->start, span);
1186     if (span->length > 1) {
1187       pagemap_.set(span->start + span->length - 1, span);
1188     }
1189   }
1190 
1191     // Allocate a large span of length == n.  If successful, returns a
1192   // span of exactly the specified length.  Else, returns NULL.
1193   Span* AllocLarge(Length n);
1194 
1195   // Incrementally release some memory to the system.
1196   // IncrementalScavenge(n) is called whenever n pages are freed.
1197   void IncrementalScavenge(Length n);
1198 
1199   // Number of pages to deallocate before doing more scavenging
1200   int64_t scavenge_counter_;
1201 
1202   // Index of last free list we scavenged
1203   size_t scavenge_index_;
1204 
1205 #if defined(WTF_CHANGES) && PLATFORM(DARWIN)
1206   friend class FastMallocZone;
1207 #endif
1208 };
1209 
init()1210 void TCMalloc_PageHeap::init()
1211 {
1212   pagemap_.init(MetaDataAlloc);
1213   pagemap_cache_ = PageMapCache(0);
1214   free_pages_ = 0;
1215   system_bytes_ = 0;
1216   scavenge_counter_ = 0;
1217   // Start scavenging at kMaxPages list
1218   scavenge_index_ = kMaxPages-1;
1219   COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
1220   DLL_Init(&large_.normal);
1221   DLL_Init(&large_.returned);
1222   for (size_t i = 0; i < kMaxPages; i++) {
1223     DLL_Init(&free_[i].normal);
1224     DLL_Init(&free_[i].returned);
1225   }
1226 }
1227 
New(Length n)1228 inline Span* TCMalloc_PageHeap::New(Length n) {
1229   ASSERT(Check());
1230   ASSERT(n > 0);
1231 
1232   // Find first size >= n that has a non-empty list
1233   for (Length s = n; s < kMaxPages; s++) {
1234     Span* ll = NULL;
1235     bool released = false;
1236     if (!DLL_IsEmpty(&free_[s].normal)) {
1237       // Found normal span
1238       ll = &free_[s].normal;
1239     } else if (!DLL_IsEmpty(&free_[s].returned)) {
1240       // Found returned span; reallocate it
1241       ll = &free_[s].returned;
1242       released = true;
1243     } else {
1244       // Keep looking in larger classes
1245       continue;
1246     }
1247 
1248     Span* result = ll->next;
1249     Carve(result, n, released);
1250 #if TCMALLOC_TRACK_DECOMMITED_SPANS
1251     if (result->decommitted) {
1252         TCMalloc_SystemCommit(reinterpret_cast<void*>(result->start << kPageShift), static_cast<size_t>(n << kPageShift));
1253         result->decommitted = false;
1254     }
1255 #endif
1256     ASSERT(Check());
1257     free_pages_ -= n;
1258     return result;
1259   }
1260 
1261   Span* result = AllocLarge(n);
1262   if (result != NULL) {
1263       ASSERT_SPAN_COMMITTED(result);
1264       return result;
1265   }
1266 
1267   // Grow the heap and try again
1268   if (!GrowHeap(n)) {
1269     ASSERT(Check());
1270     return NULL;
1271   }
1272 
1273   return AllocLarge(n);
1274 }
1275 
AllocLarge(Length n)1276 Span* TCMalloc_PageHeap::AllocLarge(Length n) {
1277   // find the best span (closest to n in size).
1278   // The following loops implements address-ordered best-fit.
1279   bool from_released = false;
1280   Span *best = NULL;
1281 
1282   // Search through normal list
1283   for (Span* span = large_.normal.next;
1284        span != &large_.normal;
1285        span = span->next) {
1286     if (span->length >= n) {
1287       if ((best == NULL)
1288           || (span->length < best->length)
1289           || ((span->length == best->length) && (span->start < best->start))) {
1290         best = span;
1291         from_released = false;
1292       }
1293     }
1294   }
1295 
1296   // Search through released list in case it has a better fit
1297   for (Span* span = large_.returned.next;
1298        span != &large_.returned;
1299        span = span->next) {
1300     if (span->length >= n) {
1301       if ((best == NULL)
1302           || (span->length < best->length)
1303           || ((span->length == best->length) && (span->start < best->start))) {
1304         best = span;
1305         from_released = true;
1306       }
1307     }
1308   }
1309 
1310   if (best != NULL) {
1311     Carve(best, n, from_released);
1312 #if TCMALLOC_TRACK_DECOMMITED_SPANS
1313     if (best->decommitted) {
1314         TCMalloc_SystemCommit(reinterpret_cast<void*>(best->start << kPageShift), static_cast<size_t>(n << kPageShift));
1315         best->decommitted = false;
1316     }
1317 #endif
1318     ASSERT(Check());
1319     free_pages_ -= n;
1320     return best;
1321   }
1322   return NULL;
1323 }
1324 
Split(Span * span,Length n)1325 Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
1326   ASSERT(0 < n);
1327   ASSERT(n < span->length);
1328   ASSERT(!span->free);
1329   ASSERT(span->sizeclass == 0);
1330   Event(span, 'T', n);
1331 
1332   const Length extra = span->length - n;
1333   Span* leftover = NewSpan(span->start + n, extra);
1334   Event(leftover, 'U', extra);
1335   RecordSpan(leftover);
1336   pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
1337   span->length = n;
1338 
1339   return leftover;
1340 }
1341 
1342 #if !TCMALLOC_TRACK_DECOMMITED_SPANS
propagateDecommittedState(Span *,Span *)1343 static ALWAYS_INLINE void propagateDecommittedState(Span*, Span*) { }
1344 #else
propagateDecommittedState(Span * destination,Span * source)1345 static ALWAYS_INLINE void propagateDecommittedState(Span* destination, Span* source)
1346 {
1347     destination->decommitted = source->decommitted;
1348 }
1349 #endif
1350 
Carve(Span * span,Length n,bool released)1351 inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
1352   ASSERT(n > 0);
1353   DLL_Remove(span);
1354   span->free = 0;
1355   Event(span, 'A', n);
1356 
1357   const int extra = static_cast<int>(span->length - n);
1358   ASSERT(extra >= 0);
1359   if (extra > 0) {
1360     Span* leftover = NewSpan(span->start + n, extra);
1361     leftover->free = 1;
1362     propagateDecommittedState(leftover, span);
1363     Event(leftover, 'S', extra);
1364     RecordSpan(leftover);
1365 
1366     // Place leftover span on appropriate free list
1367     SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_;
1368     Span* dst = released ? &listpair->returned : &listpair->normal;
1369     DLL_Prepend(dst, leftover);
1370 
1371     span->length = n;
1372     pagemap_.set(span->start + n - 1, span);
1373   }
1374 }
1375 
1376 #if !TCMALLOC_TRACK_DECOMMITED_SPANS
mergeDecommittedStates(Span *,Span *)1377 static ALWAYS_INLINE void mergeDecommittedStates(Span*, Span*) { }
1378 #else
mergeDecommittedStates(Span * destination,Span * other)1379 static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other)
1380 {
1381     if (other->decommitted)
1382         destination->decommitted = true;
1383 }
1384 #endif
1385 
Delete(Span * span)1386 inline void TCMalloc_PageHeap::Delete(Span* span) {
1387   ASSERT(Check());
1388   ASSERT(!span->free);
1389   ASSERT(span->length > 0);
1390   ASSERT(GetDescriptor(span->start) == span);
1391   ASSERT(GetDescriptor(span->start + span->length - 1) == span);
1392   span->sizeclass = 0;
1393 #ifndef NO_TCMALLOC_SAMPLES
1394   span->sample = 0;
1395 #endif
1396 
1397   // Coalesce -- we guarantee that "p" != 0, so no bounds checking
1398   // necessary.  We do not bother resetting the stale pagemap
1399   // entries for the pieces we are merging together because we only
1400   // care about the pagemap entries for the boundaries.
1401   //
1402   // Note that the spans we merge into "span" may come out of
1403   // a "returned" list.  For simplicity, we move these into the
1404   // "normal" list of the appropriate size class.
1405   const PageID p = span->start;
1406   const Length n = span->length;
1407   Span* prev = GetDescriptor(p-1);
1408   if (prev != NULL && prev->free) {
1409     // Merge preceding span into this span
1410     ASSERT(prev->start + prev->length == p);
1411     const Length len = prev->length;
1412     mergeDecommittedStates(span, prev);
1413     DLL_Remove(prev);
1414     DeleteSpan(prev);
1415     span->start -= len;
1416     span->length += len;
1417     pagemap_.set(span->start, span);
1418     Event(span, 'L', len);
1419   }
1420   Span* next = GetDescriptor(p+n);
1421   if (next != NULL && next->free) {
1422     // Merge next span into this span
1423     ASSERT(next->start == p+n);
1424     const Length len = next->length;
1425     mergeDecommittedStates(span, next);
1426     DLL_Remove(next);
1427     DeleteSpan(next);
1428     span->length += len;
1429     pagemap_.set(span->start + span->length - 1, span);
1430     Event(span, 'R', len);
1431   }
1432 
1433   Event(span, 'D', span->length);
1434   span->free = 1;
1435   if (span->length < kMaxPages) {
1436     DLL_Prepend(&free_[span->length].normal, span);
1437   } else {
1438     DLL_Prepend(&large_.normal, span);
1439   }
1440   free_pages_ += n;
1441 
1442   IncrementalScavenge(n);
1443   ASSERT(Check());
1444 }
1445 
IncrementalScavenge(Length n)1446 void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
1447   // Fast path; not yet time to release memory
1448   scavenge_counter_ -= n;
1449   if (scavenge_counter_ >= 0) return;  // Not yet time to scavenge
1450 
1451   // If there is nothing to release, wait for so many pages before
1452   // scavenging again.  With 4K pages, this comes to 16MB of memory.
1453   static const size_t kDefaultReleaseDelay = 1 << 8;
1454 
1455   // Find index of free list to scavenge
1456   size_t index = scavenge_index_ + 1;
1457   for (size_t i = 0; i < kMaxPages+1; i++) {
1458     if (index > kMaxPages) index = 0;
1459     SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
1460     if (!DLL_IsEmpty(&slist->normal)) {
1461       // Release the last span on the normal portion of this list
1462       Span* s = slist->normal.prev;
1463       DLL_Remove(s);
1464       TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1465                              static_cast<size_t>(s->length << kPageShift));
1466 #if TCMALLOC_TRACK_DECOMMITED_SPANS
1467       s->decommitted = true;
1468 #endif
1469       DLL_Prepend(&slist->returned, s);
1470 
1471       scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
1472 
1473       if (index == kMaxPages && !DLL_IsEmpty(&slist->normal))
1474         scavenge_index_ = index - 1;
1475       else
1476         scavenge_index_ = index;
1477       return;
1478     }
1479     index++;
1480   }
1481 
1482   // Nothing to scavenge, delay for a while
1483   scavenge_counter_ = kDefaultReleaseDelay;
1484 }
1485 
RegisterSizeClass(Span * span,size_t sc)1486 void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
1487   // Associate span object with all interior pages as well
1488   ASSERT(!span->free);
1489   ASSERT(GetDescriptor(span->start) == span);
1490   ASSERT(GetDescriptor(span->start+span->length-1) == span);
1491   Event(span, 'C', sc);
1492   span->sizeclass = static_cast<unsigned int>(sc);
1493   for (Length i = 1; i < span->length-1; i++) {
1494     pagemap_.set(span->start+i, span);
1495   }
1496 }
1497 
1498 #ifdef WTF_CHANGES
ReturnedBytes() const1499 size_t TCMalloc_PageHeap::ReturnedBytes() const {
1500     size_t result = 0;
1501     for (unsigned s = 0; s < kMaxPages; s++) {
1502         const int r_length = DLL_Length(&free_[s].returned);
1503         unsigned r_pages = s * r_length;
1504         result += r_pages << kPageShift;
1505     }
1506 
1507     for (Span* s = large_.returned.next; s != &large_.returned; s = s->next)
1508         result += s->length << kPageShift;
1509     return result;
1510 }
1511 #endif
1512 
1513 #ifndef WTF_CHANGES
PagesToMB(uint64_t pages)1514 static double PagesToMB(uint64_t pages) {
1515   return (pages << kPageShift) / 1048576.0;
1516 }
1517 
Dump(TCMalloc_Printer * out)1518 void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
1519   int nonempty_sizes = 0;
1520   for (int s = 0; s < kMaxPages; s++) {
1521     if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
1522       nonempty_sizes++;
1523     }
1524   }
1525   out->printf("------------------------------------------------\n");
1526   out->printf("PageHeap: %d sizes; %6.1f MB free\n",
1527               nonempty_sizes, PagesToMB(free_pages_));
1528   out->printf("------------------------------------------------\n");
1529   uint64_t total_normal = 0;
1530   uint64_t total_returned = 0;
1531   for (int s = 0; s < kMaxPages; s++) {
1532     const int n_length = DLL_Length(&free_[s].normal);
1533     const int r_length = DLL_Length(&free_[s].returned);
1534     if (n_length + r_length > 0) {
1535       uint64_t n_pages = s * n_length;
1536       uint64_t r_pages = s * r_length;
1537       total_normal += n_pages;
1538       total_returned += r_pages;
1539       out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
1540                   "; unmapped: %6.1f MB; %6.1f MB cum\n",
1541                   s,
1542                   (n_length + r_length),
1543                   PagesToMB(n_pages + r_pages),
1544                   PagesToMB(total_normal + total_returned),
1545                   PagesToMB(r_pages),
1546                   PagesToMB(total_returned));
1547     }
1548   }
1549 
1550   uint64_t n_pages = 0;
1551   uint64_t r_pages = 0;
1552   int n_spans = 0;
1553   int r_spans = 0;
1554   out->printf("Normal large spans:\n");
1555   for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
1556     out->printf("   [ %6" PRIuS " pages ] %6.1f MB\n",
1557                 s->length, PagesToMB(s->length));
1558     n_pages += s->length;
1559     n_spans++;
1560   }
1561   out->printf("Unmapped large spans:\n");
1562   for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
1563     out->printf("   [ %6" PRIuS " pages ] %6.1f MB\n",
1564                 s->length, PagesToMB(s->length));
1565     r_pages += s->length;
1566     r_spans++;
1567   }
1568   total_normal += n_pages;
1569   total_returned += r_pages;
1570   out->printf(">255   large * %6u spans ~ %6.1f MB; %6.1f MB cum"
1571               "; unmapped: %6.1f MB; %6.1f MB cum\n",
1572               (n_spans + r_spans),
1573               PagesToMB(n_pages + r_pages),
1574               PagesToMB(total_normal + total_returned),
1575               PagesToMB(r_pages),
1576               PagesToMB(total_returned));
1577 }
1578 #endif
1579 
GrowHeap(Length n)1580 bool TCMalloc_PageHeap::GrowHeap(Length n) {
1581   ASSERT(kMaxPages >= kMinSystemAlloc);
1582   if (n > kMaxValidPages) return false;
1583   Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
1584   size_t actual_size;
1585   void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1586   if (ptr == NULL) {
1587     if (n < ask) {
1588       // Try growing just "n" pages
1589       ask = n;
1590       ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1591     }
1592     if (ptr == NULL) return false;
1593   }
1594   ask = actual_size >> kPageShift;
1595 
1596   uint64_t old_system_bytes = system_bytes_;
1597   system_bytes_ += (ask << kPageShift);
1598   const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1599   ASSERT(p > 0);
1600 
1601   // If we have already a lot of pages allocated, just pre allocate a bunch of
1602   // memory for the page map. This prevents fragmentation by pagemap metadata
1603   // when a program keeps allocating and freeing large blocks.
1604 
1605   if (old_system_bytes < kPageMapBigAllocationThreshold
1606       && system_bytes_ >= kPageMapBigAllocationThreshold) {
1607     pagemap_.PreallocateMoreMemory();
1608   }
1609 
1610   // Make sure pagemap_ has entries for all of the new pages.
1611   // Plus ensure one before and one after so coalescing code
1612   // does not need bounds-checking.
1613   if (pagemap_.Ensure(p-1, ask+2)) {
1614     // Pretend the new area is allocated and then Delete() it to
1615     // cause any necessary coalescing to occur.
1616     //
1617     // We do not adjust free_pages_ here since Delete() will do it for us.
1618     Span* span = NewSpan(p, ask);
1619     RecordSpan(span);
1620     Delete(span);
1621     ASSERT(Check());
1622     return true;
1623   } else {
1624     // We could not allocate memory within "pagemap_"
1625     // TODO: Once we can return memory to the system, return the new span
1626     return false;
1627   }
1628 }
1629 
Check()1630 bool TCMalloc_PageHeap::Check() {
1631   ASSERT(free_[0].normal.next == &free_[0].normal);
1632   ASSERT(free_[0].returned.next == &free_[0].returned);
1633   CheckList(&large_.normal, kMaxPages, 1000000000);
1634   CheckList(&large_.returned, kMaxPages, 1000000000);
1635   for (Length s = 1; s < kMaxPages; s++) {
1636     CheckList(&free_[s].normal, s, s);
1637     CheckList(&free_[s].returned, s, s);
1638   }
1639   return true;
1640 }
1641 
1642 #if ASSERT_DISABLED
CheckList(Span *,Length,Length)1643 bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) {
1644   return true;
1645 }
1646 #else
CheckList(Span * list,Length min_pages,Length max_pages)1647 bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) {
1648   for (Span* s = list->next; s != list; s = s->next) {
1649     CHECK_CONDITION(s->free);
1650     CHECK_CONDITION(s->length >= min_pages);
1651     CHECK_CONDITION(s->length <= max_pages);
1652     CHECK_CONDITION(GetDescriptor(s->start) == s);
1653     CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
1654   }
1655   return true;
1656 }
1657 #endif
1658 
ReleaseFreeList(Span * list,Span * returned)1659 static void ReleaseFreeList(Span* list, Span* returned) {
1660   // Walk backwards through list so that when we push these
1661   // spans on the "returned" list, we preserve the order.
1662   while (!DLL_IsEmpty(list)) {
1663     Span* s = list->prev;
1664     DLL_Remove(s);
1665     DLL_Prepend(returned, s);
1666     TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1667                            static_cast<size_t>(s->length << kPageShift));
1668   }
1669 }
1670 
ReleaseFreePages()1671 void TCMalloc_PageHeap::ReleaseFreePages() {
1672   for (Length s = 0; s < kMaxPages; s++) {
1673     ReleaseFreeList(&free_[s].normal, &free_[s].returned);
1674   }
1675   ReleaseFreeList(&large_.normal, &large_.returned);
1676   ASSERT(Check());
1677 }
1678 
1679 //-------------------------------------------------------------------
1680 // Free list
1681 //-------------------------------------------------------------------
1682 
1683 class TCMalloc_ThreadCache_FreeList {
1684  private:
1685   void*    list_;       // Linked list of nodes
1686   uint16_t length_;     // Current length
1687   uint16_t lowater_;    // Low water mark for list length
1688 
1689  public:
Init()1690   void Init() {
1691     list_ = NULL;
1692     length_ = 0;
1693     lowater_ = 0;
1694   }
1695 
1696   // Return current length of list
length() const1697   int length() const {
1698     return length_;
1699   }
1700 
1701   // Is list empty?
empty() const1702   bool empty() const {
1703     return list_ == NULL;
1704   }
1705 
1706   // Low-water mark management
lowwatermark() const1707   int lowwatermark() const { return lowater_; }
clear_lowwatermark()1708   void clear_lowwatermark() { lowater_ = length_; }
1709 
Push(void * ptr)1710   ALWAYS_INLINE void Push(void* ptr) {
1711     SLL_Push(&list_, ptr);
1712     length_++;
1713   }
1714 
PushRange(int N,void * start,void * end)1715   void PushRange(int N, void *start, void *end) {
1716     SLL_PushRange(&list_, start, end);
1717     length_ = length_ + static_cast<uint16_t>(N);
1718   }
1719 
PopRange(int N,void ** start,void ** end)1720   void PopRange(int N, void **start, void **end) {
1721     SLL_PopRange(&list_, N, start, end);
1722     ASSERT(length_ >= N);
1723     length_ = length_ - static_cast<uint16_t>(N);
1724     if (length_ < lowater_) lowater_ = length_;
1725   }
1726 
Pop()1727   ALWAYS_INLINE void* Pop() {
1728     ASSERT(list_ != NULL);
1729     length_--;
1730     if (length_ < lowater_) lowater_ = length_;
1731     return SLL_Pop(&list_);
1732   }
1733 
1734 #ifdef WTF_CHANGES
1735   template <class Finder, class Reader>
enumerateFreeObjects(Finder & finder,const Reader & reader)1736   void enumerateFreeObjects(Finder& finder, const Reader& reader)
1737   {
1738       for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
1739           finder.visit(nextObject);
1740   }
1741 #endif
1742 };
1743 
1744 //-------------------------------------------------------------------
1745 // Data kept per thread
1746 //-------------------------------------------------------------------
1747 
1748 class TCMalloc_ThreadCache {
1749  private:
1750   typedef TCMalloc_ThreadCache_FreeList FreeList;
1751 #if COMPILER(MSVC)
1752   typedef DWORD ThreadIdentifier;
1753 #else
1754   typedef pthread_t ThreadIdentifier;
1755 #endif
1756 
1757   size_t        size_;                  // Combined size of data
1758   ThreadIdentifier tid_;                // Which thread owns it
1759   bool          in_setspecific_;           // Called pthread_setspecific?
1760   FreeList      list_[kNumClasses];     // Array indexed by size-class
1761 
1762   // We sample allocations, biased by the size of the allocation
1763   uint32_t      rnd_;                   // Cheap random number generator
1764   size_t        bytes_until_sample_;    // Bytes until we sample next
1765 
1766   // Allocate a new heap. REQUIRES: pageheap_lock is held.
1767   static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid);
1768 
1769   // Use only as pthread thread-specific destructor function.
1770   static void DestroyThreadCache(void* ptr);
1771  public:
1772   // All ThreadCache objects are kept in a linked list (for stats collection)
1773   TCMalloc_ThreadCache* next_;
1774   TCMalloc_ThreadCache* prev_;
1775 
1776   void Init(ThreadIdentifier tid);
1777   void Cleanup();
1778 
1779   // Accessors (mostly just for printing stats)
freelist_length(size_t cl) const1780   int freelist_length(size_t cl) const { return list_[cl].length(); }
1781 
1782   // Total byte size in cache
Size() const1783   size_t Size() const { return size_; }
1784 
1785   void* Allocate(size_t size);
1786   void Deallocate(void* ptr, size_t size_class);
1787 
1788   void FetchFromCentralCache(size_t cl, size_t allocationSize);
1789   void ReleaseToCentralCache(size_t cl, int N);
1790   void Scavenge();
1791   void Print() const;
1792 
1793   // Record allocation of "k" bytes.  Return true iff allocation
1794   // should be sampled
1795   bool SampleAllocation(size_t k);
1796 
1797   // Pick next sampling point
1798   void PickNextSample(size_t k);
1799 
1800   static void                  InitModule();
1801   static void                  InitTSD();
1802   static TCMalloc_ThreadCache* GetThreadHeap();
1803   static TCMalloc_ThreadCache* GetCache();
1804   static TCMalloc_ThreadCache* GetCacheIfPresent();
1805   static TCMalloc_ThreadCache* CreateCacheIfNecessary();
1806   static void                  DeleteCache(TCMalloc_ThreadCache* heap);
1807   static void                  BecomeIdle();
1808   static void                  RecomputeThreadCacheSize();
1809 
1810 #ifdef WTF_CHANGES
1811   template <class Finder, class Reader>
enumerateFreeObjects(Finder & finder,const Reader & reader)1812   void enumerateFreeObjects(Finder& finder, const Reader& reader)
1813   {
1814       for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++)
1815           list_[sizeClass].enumerateFreeObjects(finder, reader);
1816   }
1817 #endif
1818 };
1819 
1820 //-------------------------------------------------------------------
1821 // Data kept per size-class in central cache
1822 //-------------------------------------------------------------------
1823 
1824 class TCMalloc_Central_FreeList {
1825  public:
1826   void Init(size_t cl);
1827 
1828   // These methods all do internal locking.
1829 
1830   // Insert the specified range into the central freelist.  N is the number of
1831   // elements in the range.
1832   void InsertRange(void *start, void *end, int N);
1833 
1834   // Returns the actual number of fetched elements into N.
1835   void RemoveRange(void **start, void **end, int *N);
1836 
1837   // Returns the number of free objects in cache.
length()1838   size_t length() {
1839     SpinLockHolder h(&lock_);
1840     return counter_;
1841   }
1842 
1843   // Returns the number of free objects in the transfer cache.
tc_length()1844   int tc_length() {
1845     SpinLockHolder h(&lock_);
1846     return used_slots_ * num_objects_to_move[size_class_];
1847   }
1848 
1849 #ifdef WTF_CHANGES
1850   template <class Finder, class Reader>
enumerateFreeObjects(Finder & finder,const Reader & reader,TCMalloc_Central_FreeList * remoteCentralFreeList)1851   void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList)
1852   {
1853     for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0))
1854       ASSERT(!span->objects);
1855 
1856     ASSERT(!nonempty_.objects);
1857     static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this);
1858 
1859     Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset);
1860     Span* remoteSpan = nonempty_.next;
1861 
1862     for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) {
1863       for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
1864         finder.visit(nextObject);
1865     }
1866   }
1867 #endif
1868 
1869  private:
1870   // REQUIRES: lock_ is held
1871   // Remove object from cache and return.
1872   // Return NULL if no free entries in cache.
1873   void* FetchFromSpans();
1874 
1875   // REQUIRES: lock_ is held
1876   // Remove object from cache and return.  Fetches
1877   // from pageheap if cache is empty.  Only returns
1878   // NULL on allocation failure.
1879   void* FetchFromSpansSafe();
1880 
1881   // REQUIRES: lock_ is held
1882   // Release a linked list of objects to spans.
1883   // May temporarily release lock_.
1884   void ReleaseListToSpans(void *start);
1885 
1886   // REQUIRES: lock_ is held
1887   // Release an object to spans.
1888   // May temporarily release lock_.
1889   void ReleaseToSpans(void* object);
1890 
1891   // REQUIRES: lock_ is held
1892   // Populate cache by fetching from the page heap.
1893   // May temporarily release lock_.
1894   void Populate();
1895 
1896   // REQUIRES: lock is held.
1897   // Tries to make room for a TCEntry.  If the cache is full it will try to
1898   // expand it at the cost of some other cache size.  Return false if there is
1899   // no space.
1900   bool MakeCacheSpace();
1901 
1902   // REQUIRES: lock_ for locked_size_class is held.
1903   // Picks a "random" size class to steal TCEntry slot from.  In reality it
1904   // just iterates over the sizeclasses but does so without taking a lock.
1905   // Returns true on success.
1906   // May temporarily lock a "random" size class.
1907   static bool EvictRandomSizeClass(size_t locked_size_class, bool force);
1908 
1909   // REQUIRES: lock_ is *not* held.
1910   // Tries to shrink the Cache.  If force is true it will relase objects to
1911   // spans if it allows it to shrink the cache.  Return false if it failed to
1912   // shrink the cache.  Decrements cache_size_ on succeess.
1913   // May temporarily take lock_.  If it takes lock_, the locked_size_class
1914   // lock is released to the thread from holding two size class locks
1915   // concurrently which could lead to a deadlock.
1916   bool ShrinkCache(int locked_size_class, bool force);
1917 
1918   // This lock protects all the data members.  cached_entries and cache_size_
1919   // may be looked at without holding the lock.
1920   SpinLock lock_;
1921 
1922   // We keep linked lists of empty and non-empty spans.
1923   size_t   size_class_;     // My size class
1924   Span     empty_;          // Dummy header for list of empty spans
1925   Span     nonempty_;       // Dummy header for list of non-empty spans
1926   size_t   counter_;        // Number of free objects in cache entry
1927 
1928   // Here we reserve space for TCEntry cache slots.  Since one size class can
1929   // end up getting all the TCEntries quota in the system we just preallocate
1930   // sufficient number of entries here.
1931   TCEntry tc_slots_[kNumTransferEntries];
1932 
1933   // Number of currently used cached entries in tc_slots_.  This variable is
1934   // updated under a lock but can be read without one.
1935   int32_t used_slots_;
1936   // The current number of slots for this size class.  This is an
1937   // adaptive value that is increased if there is lots of traffic
1938   // on a given size class.
1939   int32_t cache_size_;
1940 };
1941 
1942 // Pad each CentralCache object to multiple of 64 bytes
1943 class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
1944  private:
1945   char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64];
1946 };
1947 
1948 //-------------------------------------------------------------------
1949 // Global variables
1950 //-------------------------------------------------------------------
1951 
1952 // Central cache -- a collection of free-lists, one per size-class.
1953 // We have a separate lock per free-list to reduce contention.
1954 static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
1955 
1956 // Page-level allocator
1957 static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
1958 static void* pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(void*) - 1) / sizeof(void*)];
1959 static bool phinited = false;
1960 
1961 // Avoid extra level of indirection by making "pageheap" be just an alias
1962 // of pageheap_memory.
1963 typedef union {
1964     void* m_memory;
1965     TCMalloc_PageHeap* m_pageHeap;
1966 } PageHeapUnion;
1967 
getPageHeap()1968 static inline TCMalloc_PageHeap* getPageHeap()
1969 {
1970     PageHeapUnion u = { &pageheap_memory[0] };
1971     return u.m_pageHeap;
1972 }
1973 
1974 #define pageheap getPageHeap()
1975 
1976 // If TLS is available, we also store a copy
1977 // of the per-thread object in a __thread variable
1978 // since __thread variables are faster to read
1979 // than pthread_getspecific().  We still need
1980 // pthread_setspecific() because __thread
1981 // variables provide no way to run cleanup
1982 // code when a thread is destroyed.
1983 #ifdef HAVE_TLS
1984 static __thread TCMalloc_ThreadCache *threadlocal_heap;
1985 #endif
1986 // Thread-specific key.  Initialization here is somewhat tricky
1987 // because some Linux startup code invokes malloc() before it
1988 // is in a good enough state to handle pthread_keycreate().
1989 // Therefore, we use TSD keys only after tsd_inited is set to true.
1990 // Until then, we use a slow path to get the heap object.
1991 static bool tsd_inited = false;
1992 static pthread_key_t heap_key;
1993 #if COMPILER(MSVC)
1994 DWORD tlsIndex = TLS_OUT_OF_INDEXES;
1995 #endif
1996 
setThreadHeap(TCMalloc_ThreadCache * heap)1997 static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap)
1998 {
1999     // still do pthread_setspecific when using MSVC fast TLS to
2000     // benefit from the delete callback.
2001     pthread_setspecific(heap_key, heap);
2002 #if COMPILER(MSVC)
2003     TlsSetValue(tlsIndex, heap);
2004 #endif
2005 }
2006 
2007 // Allocator for thread heaps
2008 static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
2009 
2010 // Linked list of heap objects.  Protected by pageheap_lock.
2011 static TCMalloc_ThreadCache* thread_heaps = NULL;
2012 static int thread_heap_count = 0;
2013 
2014 // Overall thread cache size.  Protected by pageheap_lock.
2015 static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize;
2016 
2017 // Global per-thread cache size.  Writes are protected by
2018 // pageheap_lock.  Reads are done without any locking, which should be
2019 // fine as long as size_t can be written atomically and we don't place
2020 // invariants between this variable and other pieces of state.
2021 static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
2022 
2023 //-------------------------------------------------------------------
2024 // Central cache implementation
2025 //-------------------------------------------------------------------
2026 
Init(size_t cl)2027 void TCMalloc_Central_FreeList::Init(size_t cl) {
2028   lock_.Init();
2029   size_class_ = cl;
2030   DLL_Init(&empty_);
2031   DLL_Init(&nonempty_);
2032   counter_ = 0;
2033 
2034   cache_size_ = 1;
2035   used_slots_ = 0;
2036   ASSERT(cache_size_ <= kNumTransferEntries);
2037 }
2038 
ReleaseListToSpans(void * start)2039 void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) {
2040   while (start) {
2041     void *next = SLL_Next(start);
2042     ReleaseToSpans(start);
2043     start = next;
2044   }
2045 }
2046 
ReleaseToSpans(void * object)2047 ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
2048   const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
2049   Span* span = pageheap->GetDescriptor(p);
2050   ASSERT(span != NULL);
2051   ASSERT(span->refcount > 0);
2052 
2053   // If span is empty, move it to non-empty list
2054   if (span->objects == NULL) {
2055     DLL_Remove(span);
2056     DLL_Prepend(&nonempty_, span);
2057     Event(span, 'N', 0);
2058   }
2059 
2060   // The following check is expensive, so it is disabled by default
2061   if (false) {
2062     // Check that object does not occur in list
2063     int got = 0;
2064     for (void* p = span->objects; p != NULL; p = *((void**) p)) {
2065       ASSERT(p != object);
2066       got++;
2067     }
2068     ASSERT(got + span->refcount ==
2069            (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass));
2070   }
2071 
2072   counter_++;
2073   span->refcount--;
2074   if (span->refcount == 0) {
2075     Event(span, '#', 0);
2076     counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass);
2077     DLL_Remove(span);
2078 
2079     // Release central list lock while operating on pageheap
2080     lock_.Unlock();
2081     {
2082       SpinLockHolder h(&pageheap_lock);
2083       pageheap->Delete(span);
2084     }
2085     lock_.Lock();
2086   } else {
2087     *(reinterpret_cast<void**>(object)) = span->objects;
2088     span->objects = object;
2089   }
2090 }
2091 
EvictRandomSizeClass(size_t locked_size_class,bool force)2092 ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
2093     size_t locked_size_class, bool force) {
2094   static int race_counter = 0;
2095   int t = race_counter++;  // Updated without a lock, but who cares.
2096   if (t >= static_cast<int>(kNumClasses)) {
2097     while (t >= static_cast<int>(kNumClasses)) {
2098       t -= kNumClasses;
2099     }
2100     race_counter = t;
2101   }
2102   ASSERT(t >= 0);
2103   ASSERT(t < static_cast<int>(kNumClasses));
2104   if (t == static_cast<int>(locked_size_class)) return false;
2105   return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force);
2106 }
2107 
MakeCacheSpace()2108 bool TCMalloc_Central_FreeList::MakeCacheSpace() {
2109   // Is there room in the cache?
2110   if (used_slots_ < cache_size_) return true;
2111   // Check if we can expand this cache?
2112   if (cache_size_ == kNumTransferEntries) return false;
2113   // Ok, we'll try to grab an entry from some other size class.
2114   if (EvictRandomSizeClass(size_class_, false) ||
2115       EvictRandomSizeClass(size_class_, true)) {
2116     // Succeeded in evicting, we're going to make our cache larger.
2117     cache_size_++;
2118     return true;
2119   }
2120   return false;
2121 }
2122 
2123 
2124 namespace {
2125 class LockInverter {
2126  private:
2127   SpinLock *held_, *temp_;
2128  public:
LockInverter(SpinLock * held,SpinLock * temp)2129   inline explicit LockInverter(SpinLock* held, SpinLock *temp)
2130     : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
~LockInverter()2131   inline ~LockInverter() { temp_->Unlock(); held_->Lock();  }
2132 };
2133 }
2134 
ShrinkCache(int locked_size_class,bool force)2135 bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) {
2136   // Start with a quick check without taking a lock.
2137   if (cache_size_ == 0) return false;
2138   // We don't evict from a full cache unless we are 'forcing'.
2139   if (force == false && used_slots_ == cache_size_) return false;
2140 
2141   // Grab lock, but first release the other lock held by this thread.  We use
2142   // the lock inverter to ensure that we never hold two size class locks
2143   // concurrently.  That can create a deadlock because there is no well
2144   // defined nesting order.
2145   LockInverter li(&central_cache[locked_size_class].lock_, &lock_);
2146   ASSERT(used_slots_ <= cache_size_);
2147   ASSERT(0 <= cache_size_);
2148   if (cache_size_ == 0) return false;
2149   if (used_slots_ == cache_size_) {
2150     if (force == false) return false;
2151     // ReleaseListToSpans releases the lock, so we have to make all the
2152     // updates to the central list before calling it.
2153     cache_size_--;
2154     used_slots_--;
2155     ReleaseListToSpans(tc_slots_[used_slots_].head);
2156     return true;
2157   }
2158   cache_size_--;
2159   return true;
2160 }
2161 
InsertRange(void * start,void * end,int N)2162 void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
2163   SpinLockHolder h(&lock_);
2164   if (N == num_objects_to_move[size_class_] &&
2165     MakeCacheSpace()) {
2166     int slot = used_slots_++;
2167     ASSERT(slot >=0);
2168     ASSERT(slot < kNumTransferEntries);
2169     TCEntry *entry = &tc_slots_[slot];
2170     entry->head = start;
2171     entry->tail = end;
2172     return;
2173   }
2174   ReleaseListToSpans(start);
2175 }
2176 
RemoveRange(void ** start,void ** end,int * N)2177 void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
2178   int num = *N;
2179   ASSERT(num > 0);
2180 
2181   SpinLockHolder h(&lock_);
2182   if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
2183     int slot = --used_slots_;
2184     ASSERT(slot >= 0);
2185     TCEntry *entry = &tc_slots_[slot];
2186     *start = entry->head;
2187     *end = entry->tail;
2188     return;
2189   }
2190 
2191   // TODO: Prefetch multiple TCEntries?
2192   void *tail = FetchFromSpansSafe();
2193   if (!tail) {
2194     // We are completely out of memory.
2195     *start = *end = NULL;
2196     *N = 0;
2197     return;
2198   }
2199 
2200   SLL_SetNext(tail, NULL);
2201   void *head = tail;
2202   int count = 1;
2203   while (count < num) {
2204     void *t = FetchFromSpans();
2205     if (!t) break;
2206     SLL_Push(&head, t);
2207     count++;
2208   }
2209   *start = head;
2210   *end = tail;
2211   *N = count;
2212 }
2213 
2214 
FetchFromSpansSafe()2215 void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
2216   void *t = FetchFromSpans();
2217   if (!t) {
2218     Populate();
2219     t = FetchFromSpans();
2220   }
2221   return t;
2222 }
2223 
FetchFromSpans()2224 void* TCMalloc_Central_FreeList::FetchFromSpans() {
2225   if (DLL_IsEmpty(&nonempty_)) return NULL;
2226   Span* span = nonempty_.next;
2227 
2228   ASSERT(span->objects != NULL);
2229   ASSERT_SPAN_COMMITTED(span);
2230   span->refcount++;
2231   void* result = span->objects;
2232   span->objects = *(reinterpret_cast<void**>(result));
2233   if (span->objects == NULL) {
2234     // Move to empty list
2235     DLL_Remove(span);
2236     DLL_Prepend(&empty_, span);
2237     Event(span, 'E', 0);
2238   }
2239   counter_--;
2240   return result;
2241 }
2242 
2243 // Fetch memory from the system and add to the central cache freelist.
Populate()2244 ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() {
2245   // Release central list lock while operating on pageheap
2246   lock_.Unlock();
2247   const size_t npages = class_to_pages[size_class_];
2248 
2249   Span* span;
2250   {
2251     SpinLockHolder h(&pageheap_lock);
2252     span = pageheap->New(npages);
2253     if (span) pageheap->RegisterSizeClass(span, size_class_);
2254   }
2255   if (span == NULL) {
2256     MESSAGE("allocation failed: %d\n", errno);
2257     lock_.Lock();
2258     return;
2259   }
2260   ASSERT_SPAN_COMMITTED(span);
2261   ASSERT(span->length == npages);
2262   // Cache sizeclass info eagerly.  Locking is not necessary.
2263   // (Instead of being eager, we could just replace any stale info
2264   // about this span, but that seems to be no better in practice.)
2265   for (size_t i = 0; i < npages; i++) {
2266     pageheap->CacheSizeClass(span->start + i, size_class_);
2267   }
2268 
2269   // Split the block into pieces and add to the free-list
2270   // TODO: coloring of objects to avoid cache conflicts?
2271   void** tail = &span->objects;
2272   char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
2273   char* limit = ptr + (npages << kPageShift);
2274   const size_t size = ByteSizeForClass(size_class_);
2275   int num = 0;
2276   char* nptr;
2277   while ((nptr = ptr + size) <= limit) {
2278     *tail = ptr;
2279     tail = reinterpret_cast<void**>(ptr);
2280     ptr = nptr;
2281     num++;
2282   }
2283   ASSERT(ptr <= limit);
2284   *tail = NULL;
2285   span->refcount = 0; // No sub-object in use yet
2286 
2287   // Add span to list of non-empty spans
2288   lock_.Lock();
2289   DLL_Prepend(&nonempty_, span);
2290   counter_ += num;
2291 }
2292 
2293 //-------------------------------------------------------------------
2294 // TCMalloc_ThreadCache implementation
2295 //-------------------------------------------------------------------
2296 
SampleAllocation(size_t k)2297 inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
2298   if (bytes_until_sample_ < k) {
2299     PickNextSample(k);
2300     return true;
2301   } else {
2302     bytes_until_sample_ -= k;
2303     return false;
2304   }
2305 }
2306 
Init(ThreadIdentifier tid)2307 void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) {
2308   size_ = 0;
2309   next_ = NULL;
2310   prev_ = NULL;
2311   tid_  = tid;
2312   in_setspecific_ = false;
2313   for (size_t cl = 0; cl < kNumClasses; ++cl) {
2314     list_[cl].Init();
2315   }
2316 
2317   // Initialize RNG -- run it for a bit to get to good values
2318   bytes_until_sample_ = 0;
2319   rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
2320   for (int i = 0; i < 100; i++) {
2321     PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2));
2322   }
2323 }
2324 
Cleanup()2325 void TCMalloc_ThreadCache::Cleanup() {
2326   // Put unused memory back into central cache
2327   for (size_t cl = 0; cl < kNumClasses; ++cl) {
2328     if (list_[cl].length() > 0) {
2329       ReleaseToCentralCache(cl, list_[cl].length());
2330     }
2331   }
2332 }
2333 
Allocate(size_t size)2334 ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) {
2335   ASSERT(size <= kMaxSize);
2336   const size_t cl = SizeClass(size);
2337   FreeList* list = &list_[cl];
2338   size_t allocationSize = ByteSizeForClass(cl);
2339   if (list->empty()) {
2340     FetchFromCentralCache(cl, allocationSize);
2341     if (list->empty()) return NULL;
2342   }
2343   size_ -= allocationSize;
2344   return list->Pop();
2345 }
2346 
Deallocate(void * ptr,size_t cl)2347 inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
2348   size_ += ByteSizeForClass(cl);
2349   FreeList* list = &list_[cl];
2350   list->Push(ptr);
2351   // If enough data is free, put back into central cache
2352   if (list->length() > kMaxFreeListLength) {
2353     ReleaseToCentralCache(cl, num_objects_to_move[cl]);
2354   }
2355   if (size_ >= per_thread_cache_size) Scavenge();
2356 }
2357 
2358 // Remove some objects of class "cl" from central cache and add to thread heap
FetchFromCentralCache(size_t cl,size_t allocationSize)2359 ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) {
2360   int fetch_count = num_objects_to_move[cl];
2361   void *start, *end;
2362   central_cache[cl].RemoveRange(&start, &end, &fetch_count);
2363   list_[cl].PushRange(fetch_count, start, end);
2364   size_ += allocationSize * fetch_count;
2365 }
2366 
2367 // Remove some objects of class "cl" from thread heap and add to central cache
ReleaseToCentralCache(size_t cl,int N)2368 inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
2369   ASSERT(N > 0);
2370   FreeList* src = &list_[cl];
2371   if (N > src->length()) N = src->length();
2372   size_ -= N*ByteSizeForClass(cl);
2373 
2374   // We return prepackaged chains of the correct size to the central cache.
2375   // TODO: Use the same format internally in the thread caches?
2376   int batch_size = num_objects_to_move[cl];
2377   while (N > batch_size) {
2378     void *tail, *head;
2379     src->PopRange(batch_size, &head, &tail);
2380     central_cache[cl].InsertRange(head, tail, batch_size);
2381     N -= batch_size;
2382   }
2383   void *tail, *head;
2384   src->PopRange(N, &head, &tail);
2385   central_cache[cl].InsertRange(head, tail, N);
2386 }
2387 
2388 // Release idle memory to the central cache
Scavenge()2389 inline void TCMalloc_ThreadCache::Scavenge() {
2390   // If the low-water mark for the free list is L, it means we would
2391   // not have had to allocate anything from the central cache even if
2392   // we had reduced the free list size by L.  We aim to get closer to
2393   // that situation by dropping L/2 nodes from the free list.  This
2394   // may not release much memory, but if so we will call scavenge again
2395   // pretty soon and the low-water marks will be high on that call.
2396   //int64 start = CycleClock::Now();
2397 
2398   for (size_t cl = 0; cl < kNumClasses; cl++) {
2399     FreeList* list = &list_[cl];
2400     const int lowmark = list->lowwatermark();
2401     if (lowmark > 0) {
2402       const int drop = (lowmark > 1) ? lowmark/2 : 1;
2403       ReleaseToCentralCache(cl, drop);
2404     }
2405     list->clear_lowwatermark();
2406   }
2407 
2408   //int64 finish = CycleClock::Now();
2409   //CycleTimer ct;
2410   //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
2411 }
2412 
PickNextSample(size_t k)2413 void TCMalloc_ThreadCache::PickNextSample(size_t k) {
2414   // Make next "random" number
2415   // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
2416   static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
2417   uint32_t r = rnd_;
2418   rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
2419 
2420   // Next point is "rnd_ % (sample_period)".  I.e., average
2421   // increment is "sample_period/2".
2422   const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter);
2423   static int last_flag_value = -1;
2424 
2425   if (flag_value != last_flag_value) {
2426     SpinLockHolder h(&sample_period_lock);
2427     int i;
2428     for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) {
2429       if (primes_list[i] >= flag_value) {
2430         break;
2431       }
2432     }
2433     sample_period = primes_list[i];
2434     last_flag_value = flag_value;
2435   }
2436 
2437   bytes_until_sample_ += rnd_ % sample_period;
2438 
2439   if (k > (static_cast<size_t>(-1) >> 2)) {
2440     // If the user has asked for a huge allocation then it is possible
2441     // for the code below to loop infinitely.  Just return (note that
2442     // this throws off the sampling accuracy somewhat, but a user who
2443     // is allocating more than 1G of memory at a time can live with a
2444     // minor inaccuracy in profiling of small allocations, and also
2445     // would rather not wait for the loop below to terminate).
2446     return;
2447   }
2448 
2449   while (bytes_until_sample_ < k) {
2450     // Increase bytes_until_sample_ by enough average sampling periods
2451     // (sample_period >> 1) to allow us to sample past the current
2452     // allocation.
2453     bytes_until_sample_ += (sample_period >> 1);
2454   }
2455 
2456   bytes_until_sample_ -= k;
2457 }
2458 
InitModule()2459 void TCMalloc_ThreadCache::InitModule() {
2460   // There is a slight potential race here because of double-checked
2461   // locking idiom.  However, as long as the program does a small
2462   // allocation before switching to multi-threaded mode, we will be
2463   // fine.  We increase the chances of doing such a small allocation
2464   // by doing one in the constructor of the module_enter_exit_hook
2465   // object declared below.
2466   SpinLockHolder h(&pageheap_lock);
2467   if (!phinited) {
2468 #ifdef WTF_CHANGES
2469     InitTSD();
2470 #endif
2471     InitSizeClasses();
2472     threadheap_allocator.Init();
2473     span_allocator.Init();
2474     span_allocator.New(); // Reduce cache conflicts
2475     span_allocator.New(); // Reduce cache conflicts
2476     stacktrace_allocator.Init();
2477     DLL_Init(&sampled_objects);
2478     for (size_t i = 0; i < kNumClasses; ++i) {
2479       central_cache[i].Init(i);
2480     }
2481     pageheap->init();
2482     phinited = 1;
2483 #if defined(WTF_CHANGES) && PLATFORM(DARWIN)
2484     FastMallocZone::init();
2485 #endif
2486   }
2487 }
2488 
NewHeap(ThreadIdentifier tid)2489 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) {
2490   // Create the heap and add it to the linked list
2491   TCMalloc_ThreadCache *heap = threadheap_allocator.New();
2492   heap->Init(tid);
2493   heap->next_ = thread_heaps;
2494   heap->prev_ = NULL;
2495   if (thread_heaps != NULL) thread_heaps->prev_ = heap;
2496   thread_heaps = heap;
2497   thread_heap_count++;
2498   RecomputeThreadCacheSize();
2499   return heap;
2500 }
2501 
GetThreadHeap()2502 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() {
2503 #ifdef HAVE_TLS
2504     // __thread is faster, but only when the kernel supports it
2505   if (KernelSupportsTLS())
2506     return threadlocal_heap;
2507 #elif COMPILER(MSVC)
2508     return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex));
2509 #else
2510     return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
2511 #endif
2512 }
2513 
GetCache()2514 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
2515   TCMalloc_ThreadCache* ptr = NULL;
2516   if (!tsd_inited) {
2517     InitModule();
2518   } else {
2519     ptr = GetThreadHeap();
2520   }
2521   if (ptr == NULL) ptr = CreateCacheIfNecessary();
2522   return ptr;
2523 }
2524 
2525 // In deletion paths, we do not try to create a thread-cache.  This is
2526 // because we may be in the thread destruction code and may have
2527 // already cleaned up the cache for this thread.
GetCacheIfPresent()2528 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
2529   if (!tsd_inited) return NULL;
2530   void* const p = GetThreadHeap();
2531   return reinterpret_cast<TCMalloc_ThreadCache*>(p);
2532 }
2533 
InitTSD()2534 void TCMalloc_ThreadCache::InitTSD() {
2535   ASSERT(!tsd_inited);
2536   pthread_key_create(&heap_key, DestroyThreadCache);
2537 #if COMPILER(MSVC)
2538   tlsIndex = TlsAlloc();
2539 #endif
2540   tsd_inited = true;
2541 
2542 #if !COMPILER(MSVC)
2543   // We may have used a fake pthread_t for the main thread.  Fix it.
2544   pthread_t zero;
2545   memset(&zero, 0, sizeof(zero));
2546 #endif
2547 #ifndef WTF_CHANGES
2548   SpinLockHolder h(&pageheap_lock);
2549 #else
2550   ASSERT(pageheap_lock.IsHeld());
2551 #endif
2552   for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2553 #if COMPILER(MSVC)
2554     if (h->tid_ == 0) {
2555       h->tid_ = GetCurrentThreadId();
2556     }
2557 #else
2558     if (pthread_equal(h->tid_, zero)) {
2559       h->tid_ = pthread_self();
2560     }
2561 #endif
2562   }
2563 }
2564 
CreateCacheIfNecessary()2565 TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
2566   // Initialize per-thread data if necessary
2567   TCMalloc_ThreadCache* heap = NULL;
2568   {
2569     SpinLockHolder h(&pageheap_lock);
2570 
2571 #if COMPILER(MSVC)
2572     DWORD me;
2573     if (!tsd_inited) {
2574       me = 0;
2575     } else {
2576       me = GetCurrentThreadId();
2577     }
2578 #else
2579     // Early on in glibc's life, we cannot even call pthread_self()
2580     pthread_t me;
2581     if (!tsd_inited) {
2582       memset(&me, 0, sizeof(me));
2583     } else {
2584       me = pthread_self();
2585     }
2586 #endif
2587 
2588     // This may be a recursive malloc call from pthread_setspecific()
2589     // In that case, the heap for this thread has already been created
2590     // and added to the linked list.  So we search for that first.
2591     for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2592 #if COMPILER(MSVC)
2593       if (h->tid_ == me) {
2594 #else
2595       if (pthread_equal(h->tid_, me)) {
2596 #endif
2597         heap = h;
2598         break;
2599       }
2600     }
2601 
2602     if (heap == NULL) heap = NewHeap(me);
2603   }
2604 
2605   // We call pthread_setspecific() outside the lock because it may
2606   // call malloc() recursively.  The recursive call will never get
2607   // here again because it will find the already allocated heap in the
2608   // linked list of heaps.
2609   if (!heap->in_setspecific_ && tsd_inited) {
2610     heap->in_setspecific_ = true;
2611     setThreadHeap(heap);
2612   }
2613   return heap;
2614 }
2615 
2616 void TCMalloc_ThreadCache::BecomeIdle() {
2617   if (!tsd_inited) return;              // No caches yet
2618   TCMalloc_ThreadCache* heap = GetThreadHeap();
2619   if (heap == NULL) return;             // No thread cache to remove
2620   if (heap->in_setspecific_) return;    // Do not disturb the active caller
2621 
2622   heap->in_setspecific_ = true;
2623   pthread_setspecific(heap_key, NULL);
2624 #ifdef HAVE_TLS
2625   // Also update the copy in __thread
2626   threadlocal_heap = NULL;
2627 #endif
2628   heap->in_setspecific_ = false;
2629   if (GetThreadHeap() == heap) {
2630     // Somehow heap got reinstated by a recursive call to malloc
2631     // from pthread_setspecific.  We give up in this case.
2632     return;
2633   }
2634 
2635   // We can now get rid of the heap
2636   DeleteCache(heap);
2637 }
2638 
2639 void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) {
2640   // Note that "ptr" cannot be NULL since pthread promises not
2641   // to invoke the destructor on NULL values, but for safety,
2642   // we check anyway.
2643   if (ptr == NULL) return;
2644 #ifdef HAVE_TLS
2645   // Prevent fast path of GetThreadHeap() from returning heap.
2646   threadlocal_heap = NULL;
2647 #endif
2648   DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr));
2649 }
2650 
2651 void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) {
2652   // Remove all memory from heap
2653   heap->Cleanup();
2654 
2655   // Remove from linked list
2656   SpinLockHolder h(&pageheap_lock);
2657   if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
2658   if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
2659   if (thread_heaps == heap) thread_heaps = heap->next_;
2660   thread_heap_count--;
2661   RecomputeThreadCacheSize();
2662 
2663   threadheap_allocator.Delete(heap);
2664 }
2665 
2666 void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
2667   // Divide available space across threads
2668   int n = thread_heap_count > 0 ? thread_heap_count : 1;
2669   size_t space = overall_thread_cache_size / n;
2670 
2671   // Limit to allowed range
2672   if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
2673   if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
2674 
2675   per_thread_cache_size = space;
2676 }
2677 
2678 void TCMalloc_ThreadCache::Print() const {
2679   for (size_t cl = 0; cl < kNumClasses; ++cl) {
2680     MESSAGE("      %5" PRIuS " : %4d len; %4d lo\n",
2681             ByteSizeForClass(cl),
2682             list_[cl].length(),
2683             list_[cl].lowwatermark());
2684   }
2685 }
2686 
2687 // Extract interesting stats
2688 struct TCMallocStats {
2689   uint64_t system_bytes;        // Bytes alloced from system
2690   uint64_t thread_bytes;        // Bytes in thread caches
2691   uint64_t central_bytes;       // Bytes in central cache
2692   uint64_t transfer_bytes;      // Bytes in central transfer cache
2693   uint64_t pageheap_bytes;      // Bytes in page heap
2694   uint64_t metadata_bytes;      // Bytes alloced for metadata
2695 };
2696 
2697 #ifndef WTF_CHANGES
2698 // Get stats into "r".  Also get per-size-class counts if class_count != NULL
2699 static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
2700   r->central_bytes = 0;
2701   r->transfer_bytes = 0;
2702   for (int cl = 0; cl < kNumClasses; ++cl) {
2703     const int length = central_cache[cl].length();
2704     const int tc_length = central_cache[cl].tc_length();
2705     r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
2706     r->transfer_bytes +=
2707       static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length;
2708     if (class_count) class_count[cl] = length + tc_length;
2709   }
2710 
2711   // Add stats from per-thread heaps
2712   r->thread_bytes = 0;
2713   { // scope
2714     SpinLockHolder h(&pageheap_lock);
2715     for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2716       r->thread_bytes += h->Size();
2717       if (class_count) {
2718         for (size_t cl = 0; cl < kNumClasses; ++cl) {
2719           class_count[cl] += h->freelist_length(cl);
2720         }
2721       }
2722     }
2723   }
2724 
2725   { //scope
2726     SpinLockHolder h(&pageheap_lock);
2727     r->system_bytes = pageheap->SystemBytes();
2728     r->metadata_bytes = metadata_system_bytes;
2729     r->pageheap_bytes = pageheap->FreeBytes();
2730   }
2731 }
2732 #endif
2733 
2734 #ifndef WTF_CHANGES
2735 // WRITE stats to "out"
2736 static void DumpStats(TCMalloc_Printer* out, int level) {
2737   TCMallocStats stats;
2738   uint64_t class_count[kNumClasses];
2739   ExtractStats(&stats, (level >= 2 ? class_count : NULL));
2740 
2741   if (level >= 2) {
2742     out->printf("------------------------------------------------\n");
2743     uint64_t cumulative = 0;
2744     for (int cl = 0; cl < kNumClasses; ++cl) {
2745       if (class_count[cl] > 0) {
2746         uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
2747         cumulative += class_bytes;
2748         out->printf("class %3d [ %8" PRIuS " bytes ] : "
2749                 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n",
2750                 cl, ByteSizeForClass(cl),
2751                 class_count[cl],
2752                 class_bytes / 1048576.0,
2753                 cumulative / 1048576.0);
2754       }
2755     }
2756 
2757     SpinLockHolder h(&pageheap_lock);
2758     pageheap->Dump(out);
2759   }
2760 
2761   const uint64_t bytes_in_use = stats.system_bytes
2762                                 - stats.pageheap_bytes
2763                                 - stats.central_bytes
2764                                 - stats.transfer_bytes
2765                                 - stats.thread_bytes;
2766 
2767   out->printf("------------------------------------------------\n"
2768               "MALLOC: %12" PRIu64 " Heap size\n"
2769               "MALLOC: %12" PRIu64 " Bytes in use by application\n"
2770               "MALLOC: %12" PRIu64 " Bytes free in page heap\n"
2771               "MALLOC: %12" PRIu64 " Bytes free in central cache\n"
2772               "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n"
2773               "MALLOC: %12" PRIu64 " Bytes free in thread caches\n"
2774               "MALLOC: %12" PRIu64 " Spans in use\n"
2775               "MALLOC: %12" PRIu64 " Thread heaps in use\n"
2776               "MALLOC: %12" PRIu64 " Metadata allocated\n"
2777               "------------------------------------------------\n",
2778               stats.system_bytes,
2779               bytes_in_use,
2780               stats.pageheap_bytes,
2781               stats.central_bytes,
2782               stats.transfer_bytes,
2783               stats.thread_bytes,
2784               uint64_t(span_allocator.inuse()),
2785               uint64_t(threadheap_allocator.inuse()),
2786               stats.metadata_bytes);
2787 }
2788 
2789 static void PrintStats(int level) {
2790   const int kBufferSize = 16 << 10;
2791   char* buffer = new char[kBufferSize];
2792   TCMalloc_Printer printer(buffer, kBufferSize);
2793   DumpStats(&printer, level);
2794   write(STDERR_FILENO, buffer, strlen(buffer));
2795   delete[] buffer;
2796 }
2797 
2798 static void** DumpStackTraces() {
2799   // Count how much space we need
2800   int needed_slots = 0;
2801   {
2802     SpinLockHolder h(&pageheap_lock);
2803     for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
2804       StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
2805       needed_slots += 3 + stack->depth;
2806     }
2807     needed_slots += 100;            // Slop in case sample grows
2808     needed_slots += needed_slots/8; // An extra 12.5% slop
2809   }
2810 
2811   void** result = new void*[needed_slots];
2812   if (result == NULL) {
2813     MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
2814             needed_slots);
2815     return NULL;
2816   }
2817 
2818   SpinLockHolder h(&pageheap_lock);
2819   int used_slots = 0;
2820   for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
2821     ASSERT(used_slots < needed_slots);  // Need to leave room for terminator
2822     StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
2823     if (used_slots + 3 + stack->depth >= needed_slots) {
2824       // No more room
2825       break;
2826     }
2827 
2828     result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
2829     result[used_slots+1] = reinterpret_cast<void*>(stack->size);
2830     result[used_slots+2] = reinterpret_cast<void*>(stack->depth);
2831     for (int d = 0; d < stack->depth; d++) {
2832       result[used_slots+3+d] = stack->stack[d];
2833     }
2834     used_slots += 3 + stack->depth;
2835   }
2836   result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
2837   return result;
2838 }
2839 #endif
2840 
2841 #ifndef WTF_CHANGES
2842 
2843 // TCMalloc's support for extra malloc interfaces
2844 class TCMallocImplementation : public MallocExtension {
2845  public:
2846   virtual void GetStats(char* buffer, int buffer_length) {
2847     ASSERT(buffer_length > 0);
2848     TCMalloc_Printer printer(buffer, buffer_length);
2849 
2850     // Print level one stats unless lots of space is available
2851     if (buffer_length < 10000) {
2852       DumpStats(&printer, 1);
2853     } else {
2854       DumpStats(&printer, 2);
2855     }
2856   }
2857 
2858   virtual void** ReadStackTraces() {
2859     return DumpStackTraces();
2860   }
2861 
2862   virtual bool GetNumericProperty(const char* name, size_t* value) {
2863     ASSERT(name != NULL);
2864 
2865     if (strcmp(name, "generic.current_allocated_bytes") == 0) {
2866       TCMallocStats stats;
2867       ExtractStats(&stats, NULL);
2868       *value = stats.system_bytes
2869                - stats.thread_bytes
2870                - stats.central_bytes
2871                - stats.pageheap_bytes;
2872       return true;
2873     }
2874 
2875     if (strcmp(name, "generic.heap_size") == 0) {
2876       TCMallocStats stats;
2877       ExtractStats(&stats, NULL);
2878       *value = stats.system_bytes;
2879       return true;
2880     }
2881 
2882     if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
2883       // We assume that bytes in the page heap are not fragmented too
2884       // badly, and are therefore available for allocation.
2885       SpinLockHolder l(&pageheap_lock);
2886       *value = pageheap->FreeBytes();
2887       return true;
2888     }
2889 
2890     if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
2891       SpinLockHolder l(&pageheap_lock);
2892       *value = overall_thread_cache_size;
2893       return true;
2894     }
2895 
2896     if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
2897       TCMallocStats stats;
2898       ExtractStats(&stats, NULL);
2899       *value = stats.thread_bytes;
2900       return true;
2901     }
2902 
2903     return false;
2904   }
2905 
2906   virtual bool SetNumericProperty(const char* name, size_t value) {
2907     ASSERT(name != NULL);
2908 
2909     if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
2910       // Clip the value to a reasonable range
2911       if (value < kMinThreadCacheSize) value = kMinThreadCacheSize;
2912       if (value > (1<<30)) value = (1<<30);     // Limit to 1GB
2913 
2914       SpinLockHolder l(&pageheap_lock);
2915       overall_thread_cache_size = static_cast<size_t>(value);
2916       TCMalloc_ThreadCache::RecomputeThreadCacheSize();
2917       return true;
2918     }
2919 
2920     return false;
2921   }
2922 
2923   virtual void MarkThreadIdle() {
2924     TCMalloc_ThreadCache::BecomeIdle();
2925   }
2926 
2927   virtual void ReleaseFreeMemory() {
2928     SpinLockHolder h(&pageheap_lock);
2929     pageheap->ReleaseFreePages();
2930   }
2931 };
2932 #endif
2933 
2934 // The constructor allocates an object to ensure that initialization
2935 // runs before main(), and therefore we do not have a chance to become
2936 // multi-threaded before initialization.  We also create the TSD key
2937 // here.  Presumably by the time this constructor runs, glibc is in
2938 // good enough shape to handle pthread_key_create().
2939 //
2940 // The constructor also takes the opportunity to tell STL to use
2941 // tcmalloc.  We want to do this early, before construct time, so
2942 // all user STL allocations go through tcmalloc (which works really
2943 // well for STL).
2944 //
2945 // The destructor prints stats when the program exits.
2946 class TCMallocGuard {
2947  public:
2948 
2949   TCMallocGuard() {
2950 #ifdef HAVE_TLS    // this is true if the cc/ld/libc combo support TLS
2951     // Check whether the kernel also supports TLS (needs to happen at runtime)
2952     CheckIfKernelSupportsTLS();
2953 #endif
2954 #ifndef WTF_CHANGES
2955 #ifdef WIN32                    // patch the windows VirtualAlloc, etc.
2956     PatchWindowsFunctions();    // defined in windows/patch_functions.cc
2957 #endif
2958 #endif
2959     free(malloc(1));
2960     TCMalloc_ThreadCache::InitTSD();
2961     free(malloc(1));
2962 #ifndef WTF_CHANGES
2963     MallocExtension::Register(new TCMallocImplementation);
2964 #endif
2965   }
2966 
2967 #ifndef WTF_CHANGES
2968   ~TCMallocGuard() {
2969     const char* env = getenv("MALLOCSTATS");
2970     if (env != NULL) {
2971       int level = atoi(env);
2972       if (level < 1) level = 1;
2973       PrintStats(level);
2974     }
2975 #ifdef WIN32
2976     UnpatchWindowsFunctions();
2977 #endif
2978   }
2979 #endif
2980 };
2981 
2982 #ifndef WTF_CHANGES
2983 static TCMallocGuard module_enter_exit_hook;
2984 #endif
2985 
2986 
2987 //-------------------------------------------------------------------
2988 // Helpers for the exported routines below
2989 //-------------------------------------------------------------------
2990 
2991 #ifndef WTF_CHANGES
2992 
2993 static Span* DoSampledAllocation(size_t size) {
2994 
2995   // Grab the stack trace outside the heap lock
2996   StackTrace tmp;
2997   tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1);
2998   tmp.size = size;
2999 
3000   SpinLockHolder h(&pageheap_lock);
3001   // Allocate span
3002   Span *span = pageheap->New(pages(size == 0 ? 1 : size));
3003   if (span == NULL) {
3004     return NULL;
3005   }
3006 
3007   // Allocate stack trace
3008   StackTrace *stack = stacktrace_allocator.New();
3009   if (stack == NULL) {
3010     // Sampling failed because of lack of memory
3011     return span;
3012   }
3013 
3014   *stack = tmp;
3015   span->sample = 1;
3016   span->objects = stack;
3017   DLL_Prepend(&sampled_objects, span);
3018 
3019   return span;
3020 }
3021 #endif
3022 
3023 static inline bool CheckCachedSizeClass(void *ptr) {
3024   PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3025   size_t cached_value = pageheap->GetSizeClassIfCached(p);
3026   return cached_value == 0 ||
3027       cached_value == pageheap->GetDescriptor(p)->sizeclass;
3028 }
3029 
3030 static inline void* CheckedMallocResult(void *result)
3031 {
3032   ASSERT(result == 0 || CheckCachedSizeClass(result));
3033   return result;
3034 }
3035 
3036 static inline void* SpanToMallocResult(Span *span) {
3037   ASSERT_SPAN_COMMITTED(span);
3038   pageheap->CacheSizeClass(span->start, 0);
3039   return
3040       CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
3041 }
3042 
3043 #ifdef WTF_CHANGES
3044 template <bool crashOnFailure>
3045 #endif
3046 static ALWAYS_INLINE void* do_malloc(size_t size) {
3047   void* ret = NULL;
3048 
3049 #ifdef WTF_CHANGES
3050     ASSERT(!isForbidden());
3051 #endif
3052 
3053   // The following call forces module initialization
3054   TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3055 #ifndef WTF_CHANGES
3056   if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
3057     Span* span = DoSampledAllocation(size);
3058     if (span != NULL) {
3059       ret = SpanToMallocResult(span);
3060     }
3061   } else
3062 #endif
3063   if (size > kMaxSize) {
3064     // Use page-level allocator
3065     SpinLockHolder h(&pageheap_lock);
3066     Span* span = pageheap->New(pages(size));
3067     if (span != NULL) {
3068       ret = SpanToMallocResult(span);
3069     }
3070   } else {
3071     // The common case, and also the simplest.  This just pops the
3072     // size-appropriate freelist, afer replenishing it if it's empty.
3073     ret = CheckedMallocResult(heap->Allocate(size));
3074   }
3075   if (!ret) {
3076 #ifdef WTF_CHANGES
3077     if (crashOnFailure) // This branch should be optimized out by the compiler.
3078         CRASH();
3079 #else
3080     errno = ENOMEM;
3081 #endif
3082   }
3083   return ret;
3084 }
3085 
3086 static ALWAYS_INLINE void do_free(void* ptr) {
3087   if (ptr == NULL) return;
3088   ASSERT(pageheap != NULL);  // Should not call free() before malloc()
3089   const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3090   Span* span = NULL;
3091   size_t cl = pageheap->GetSizeClassIfCached(p);
3092 
3093   if (cl == 0) {
3094     span = pageheap->GetDescriptor(p);
3095     cl = span->sizeclass;
3096     pageheap->CacheSizeClass(p, cl);
3097   }
3098   if (cl != 0) {
3099 #ifndef NO_TCMALLOC_SAMPLES
3100     ASSERT(!pageheap->GetDescriptor(p)->sample);
3101 #endif
3102     TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent();
3103     if (heap != NULL) {
3104       heap->Deallocate(ptr, cl);
3105     } else {
3106       // Delete directly into central cache
3107       SLL_SetNext(ptr, NULL);
3108       central_cache[cl].InsertRange(ptr, ptr, 1);
3109     }
3110   } else {
3111     SpinLockHolder h(&pageheap_lock);
3112     ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
3113     ASSERT(span != NULL && span->start == p);
3114 #ifndef NO_TCMALLOC_SAMPLES
3115     if (span->sample) {
3116       DLL_Remove(span);
3117       stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects));
3118       span->objects = NULL;
3119     }
3120 #endif
3121     pageheap->Delete(span);
3122   }
3123 }
3124 
3125 #ifndef WTF_CHANGES
3126 // For use by exported routines below that want specific alignments
3127 //
3128 // Note: this code can be slow, and can significantly fragment memory.
3129 // The expectation is that memalign/posix_memalign/valloc/pvalloc will
3130 // not be invoked very often.  This requirement simplifies our
3131 // implementation and allows us to tune for expected allocation
3132 // patterns.
3133 static void* do_memalign(size_t align, size_t size) {
3134   ASSERT((align & (align - 1)) == 0);
3135   ASSERT(align > 0);
3136   if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
3137 
3138   // Allocate at least one byte to avoid boundary conditions below
3139   if (size == 0) size = 1;
3140 
3141   if (size <= kMaxSize && align < kPageSize) {
3142     // Search through acceptable size classes looking for one with
3143     // enough alignment.  This depends on the fact that
3144     // InitSizeClasses() currently produces several size classes that
3145     // are aligned at powers of two.  We will waste time and space if
3146     // we miss in the size class array, but that is deemed acceptable
3147     // since memalign() should be used rarely.
3148     size_t cl = SizeClass(size);
3149     while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) {
3150       cl++;
3151     }
3152     if (cl < kNumClasses) {
3153       TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3154       return CheckedMallocResult(heap->Allocate(class_to_size[cl]));
3155     }
3156   }
3157 
3158   // We will allocate directly from the page heap
3159   SpinLockHolder h(&pageheap_lock);
3160 
3161   if (align <= kPageSize) {
3162     // Any page-level allocation will be fine
3163     // TODO: We could put the rest of this page in the appropriate
3164     // TODO: cache but it does not seem worth it.
3165     Span* span = pageheap->New(pages(size));
3166     return span == NULL ? NULL : SpanToMallocResult(span);
3167   }
3168 
3169   // Allocate extra pages and carve off an aligned portion
3170   const Length alloc = pages(size + align);
3171   Span* span = pageheap->New(alloc);
3172   if (span == NULL) return NULL;
3173 
3174   // Skip starting portion so that we end up aligned
3175   Length skip = 0;
3176   while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
3177     skip++;
3178   }
3179   ASSERT(skip < alloc);
3180   if (skip > 0) {
3181     Span* rest = pageheap->Split(span, skip);
3182     pageheap->Delete(span);
3183     span = rest;
3184   }
3185 
3186   // Skip trailing portion that we do not need to return
3187   const Length needed = pages(size);
3188   ASSERT(span->length >= needed);
3189   if (span->length > needed) {
3190     Span* trailer = pageheap->Split(span, needed);
3191     pageheap->Delete(trailer);
3192   }
3193   return SpanToMallocResult(span);
3194 }
3195 #endif
3196 
3197 // Helpers for use by exported routines below:
3198 
3199 #ifndef WTF_CHANGES
3200 static inline void do_malloc_stats() {
3201   PrintStats(1);
3202 }
3203 #endif
3204 
3205 static inline int do_mallopt(int, int) {
3206   return 1;     // Indicates error
3207 }
3208 
3209 #ifdef HAVE_STRUCT_MALLINFO  // mallinfo isn't defined on freebsd, for instance
3210 static inline struct mallinfo do_mallinfo() {
3211   TCMallocStats stats;
3212   ExtractStats(&stats, NULL);
3213 
3214   // Just some of the fields are filled in.
3215   struct mallinfo info;
3216   memset(&info, 0, sizeof(info));
3217 
3218   // Unfortunately, the struct contains "int" field, so some of the
3219   // size values will be truncated.
3220   info.arena     = static_cast<int>(stats.system_bytes);
3221   info.fsmblks   = static_cast<int>(stats.thread_bytes
3222                                     + stats.central_bytes
3223                                     + stats.transfer_bytes);
3224   info.fordblks  = static_cast<int>(stats.pageheap_bytes);
3225   info.uordblks  = static_cast<int>(stats.system_bytes
3226                                     - stats.thread_bytes
3227                                     - stats.central_bytes
3228                                     - stats.transfer_bytes
3229                                     - stats.pageheap_bytes);
3230 
3231   return info;
3232 }
3233 #endif
3234 
3235 //-------------------------------------------------------------------
3236 // Exported routines
3237 //-------------------------------------------------------------------
3238 
3239 // CAVEAT: The code structure below ensures that MallocHook methods are always
3240 //         called from the stack frame of the invoked allocation function.
3241 //         heap-checker.cc depends on this to start a stack trace from
3242 //         the call to the (de)allocation function.
3243 
3244 #ifndef WTF_CHANGES
3245 extern "C"
3246 #else
3247 #define do_malloc do_malloc<crashOnFailure>
3248 
3249 template <bool crashOnFailure>
3250 void* malloc(size_t);
3251 
3252 void* fastMalloc(size_t size)
3253 {
3254     return malloc<true>(size);
3255 }
3256 
3257 void* tryFastMalloc(size_t size)
3258 {
3259     return malloc<false>(size);
3260 }
3261 
3262 template <bool crashOnFailure>
3263 ALWAYS_INLINE
3264 #endif
3265 void* malloc(size_t size) {
3266   void* result = do_malloc(size);
3267 #ifndef WTF_CHANGES
3268   MallocHook::InvokeNewHook(result, size);
3269 #endif
3270   return result;
3271 }
3272 
3273 #ifndef WTF_CHANGES
3274 extern "C"
3275 #endif
3276 void free(void* ptr) {
3277 #ifndef WTF_CHANGES
3278   MallocHook::InvokeDeleteHook(ptr);
3279 #endif
3280   do_free(ptr);
3281 }
3282 
3283 #ifndef WTF_CHANGES
3284 extern "C"
3285 #else
3286 template <bool crashOnFailure>
3287 void* calloc(size_t, size_t);
3288 
3289 void* fastCalloc(size_t n, size_t elem_size)
3290 {
3291     return calloc<true>(n, elem_size);
3292 }
3293 
3294 void* tryFastCalloc(size_t n, size_t elem_size)
3295 {
3296     return calloc<false>(n, elem_size);
3297 }
3298 
3299 template <bool crashOnFailure>
3300 ALWAYS_INLINE
3301 #endif
3302 void* calloc(size_t n, size_t elem_size) {
3303   const size_t totalBytes = n * elem_size;
3304 
3305   // Protect against overflow
3306   if (n > 1 && elem_size && (totalBytes / elem_size) != n)
3307     return 0;
3308 
3309   void* result = do_malloc(totalBytes);
3310   if (result != NULL) {
3311     memset(result, 0, totalBytes);
3312   }
3313 #ifndef WTF_CHANGES
3314   MallocHook::InvokeNewHook(result, totalBytes);
3315 #endif
3316   return result;
3317 }
3318 
3319 // Since cfree isn't used anywhere, we don't compile it in.
3320 #ifndef WTF_CHANGES
3321 #ifndef WTF_CHANGES
3322 extern "C"
3323 #endif
3324 void cfree(void* ptr) {
3325 #ifndef WTF_CHANGES
3326     MallocHook::InvokeDeleteHook(ptr);
3327 #endif
3328   do_free(ptr);
3329 }
3330 #endif
3331 
3332 #ifndef WTF_CHANGES
3333 extern "C"
3334 #else
3335 template <bool crashOnFailure>
3336 void* realloc(void*, size_t);
3337 
3338 void* fastRealloc(void* old_ptr, size_t new_size)
3339 {
3340     return realloc<true>(old_ptr, new_size);
3341 }
3342 
3343 void* tryFastRealloc(void* old_ptr, size_t new_size)
3344 {
3345     return realloc<false>(old_ptr, new_size);
3346 }
3347 
3348 template <bool crashOnFailure>
3349 ALWAYS_INLINE
3350 #endif
3351 void* realloc(void* old_ptr, size_t new_size) {
3352   if (old_ptr == NULL) {
3353     void* result = do_malloc(new_size);
3354 #ifndef WTF_CHANGES
3355     MallocHook::InvokeNewHook(result, new_size);
3356 #endif
3357     return result;
3358   }
3359   if (new_size == 0) {
3360 #ifndef WTF_CHANGES
3361     MallocHook::InvokeDeleteHook(old_ptr);
3362 #endif
3363     free(old_ptr);
3364     return NULL;
3365   }
3366 
3367   // Get the size of the old entry
3368   const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift;
3369   size_t cl = pageheap->GetSizeClassIfCached(p);
3370   Span *span = NULL;
3371   size_t old_size;
3372   if (cl == 0) {
3373     span = pageheap->GetDescriptor(p);
3374     cl = span->sizeclass;
3375     pageheap->CacheSizeClass(p, cl);
3376   }
3377   if (cl != 0) {
3378     old_size = ByteSizeForClass(cl);
3379   } else {
3380     ASSERT(span != NULL);
3381     old_size = span->length << kPageShift;
3382   }
3383 
3384   // Reallocate if the new size is larger than the old size,
3385   // or if the new size is significantly smaller than the old size.
3386   if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) {
3387     // Need to reallocate
3388     void* new_ptr = do_malloc(new_size);
3389     if (new_ptr == NULL) {
3390       return NULL;
3391     }
3392 #ifndef WTF_CHANGES
3393     MallocHook::InvokeNewHook(new_ptr, new_size);
3394 #endif
3395     memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
3396 #ifndef WTF_CHANGES
3397     MallocHook::InvokeDeleteHook(old_ptr);
3398 #endif
3399     // We could use a variant of do_free() that leverages the fact
3400     // that we already know the sizeclass of old_ptr.  The benefit
3401     // would be small, so don't bother.
3402     do_free(old_ptr);
3403     return new_ptr;
3404   } else {
3405     return old_ptr;
3406   }
3407 }
3408 
3409 #ifdef WTF_CHANGES
3410 #undef do_malloc
3411 #else
3412 
3413 static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
3414 
3415 static inline void* cpp_alloc(size_t size, bool nothrow) {
3416   for (;;) {
3417     void* p = do_malloc(size);
3418 #ifdef PREANSINEW
3419     return p;
3420 #else
3421     if (p == NULL) {  // allocation failed
3422       // Get the current new handler.  NB: this function is not
3423       // thread-safe.  We make a feeble stab at making it so here, but
3424       // this lock only protects against tcmalloc interfering with
3425       // itself, not with other libraries calling set_new_handler.
3426       std::new_handler nh;
3427       {
3428         SpinLockHolder h(&set_new_handler_lock);
3429         nh = std::set_new_handler(0);
3430         (void) std::set_new_handler(nh);
3431       }
3432       // If no new_handler is established, the allocation failed.
3433       if (!nh) {
3434         if (nothrow) return 0;
3435         throw std::bad_alloc();
3436       }
3437       // Otherwise, try the new_handler.  If it returns, retry the
3438       // allocation.  If it throws std::bad_alloc, fail the allocation.
3439       // if it throws something else, don't interfere.
3440       try {
3441         (*nh)();
3442       } catch (const std::bad_alloc&) {
3443         if (!nothrow) throw;
3444         return p;
3445       }
3446     } else {  // allocation success
3447       return p;
3448     }
3449 #endif
3450   }
3451 }
3452 
3453 void* operator new(size_t size) {
3454   void* p = cpp_alloc(size, false);
3455   // We keep this next instruction out of cpp_alloc for a reason: when
3456   // it's in, and new just calls cpp_alloc, the optimizer may fold the
3457   // new call into cpp_alloc, which messes up our whole section-based
3458   // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
3459   // isn't the last thing this fn calls, and prevents the folding.
3460   MallocHook::InvokeNewHook(p, size);
3461   return p;
3462 }
3463 
3464 void* operator new(size_t size, const std::nothrow_t&) __THROW {
3465   void* p = cpp_alloc(size, true);
3466   MallocHook::InvokeNewHook(p, size);
3467   return p;
3468 }
3469 
3470 void operator delete(void* p) __THROW {
3471   MallocHook::InvokeDeleteHook(p);
3472   do_free(p);
3473 }
3474 
3475 void operator delete(void* p, const std::nothrow_t&) __THROW {
3476   MallocHook::InvokeDeleteHook(p);
3477   do_free(p);
3478 }
3479 
3480 void* operator new[](size_t size) {
3481   void* p = cpp_alloc(size, false);
3482   // We keep this next instruction out of cpp_alloc for a reason: when
3483   // it's in, and new just calls cpp_alloc, the optimizer may fold the
3484   // new call into cpp_alloc, which messes up our whole section-based
3485   // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
3486   // isn't the last thing this fn calls, and prevents the folding.
3487   MallocHook::InvokeNewHook(p, size);
3488   return p;
3489 }
3490 
3491 void* operator new[](size_t size, const std::nothrow_t&) __THROW {
3492   void* p = cpp_alloc(size, true);
3493   MallocHook::InvokeNewHook(p, size);
3494   return p;
3495 }
3496 
3497 void operator delete[](void* p) __THROW {
3498   MallocHook::InvokeDeleteHook(p);
3499   do_free(p);
3500 }
3501 
3502 void operator delete[](void* p, const std::nothrow_t&) __THROW {
3503   MallocHook::InvokeDeleteHook(p);
3504   do_free(p);
3505 }
3506 
3507 extern "C" void* memalign(size_t align, size_t size) __THROW {
3508   void* result = do_memalign(align, size);
3509   MallocHook::InvokeNewHook(result, size);
3510   return result;
3511 }
3512 
3513 extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size)
3514     __THROW {
3515   if (((align % sizeof(void*)) != 0) ||
3516       ((align & (align - 1)) != 0) ||
3517       (align == 0)) {
3518     return EINVAL;
3519   }
3520 
3521   void* result = do_memalign(align, size);
3522   MallocHook::InvokeNewHook(result, size);
3523   if (result == NULL) {
3524     return ENOMEM;
3525   } else {
3526     *result_ptr = result;
3527     return 0;
3528   }
3529 }
3530 
3531 static size_t pagesize = 0;
3532 
3533 extern "C" void* valloc(size_t size) __THROW {
3534   // Allocate page-aligned object of length >= size bytes
3535   if (pagesize == 0) pagesize = getpagesize();
3536   void* result = do_memalign(pagesize, size);
3537   MallocHook::InvokeNewHook(result, size);
3538   return result;
3539 }
3540 
3541 extern "C" void* pvalloc(size_t size) __THROW {
3542   // Round up size to a multiple of pagesize
3543   if (pagesize == 0) pagesize = getpagesize();
3544   size = (size + pagesize - 1) & ~(pagesize - 1);
3545   void* result = do_memalign(pagesize, size);
3546   MallocHook::InvokeNewHook(result, size);
3547   return result;
3548 }
3549 
3550 extern "C" void malloc_stats(void) {
3551   do_malloc_stats();
3552 }
3553 
3554 extern "C" int mallopt(int cmd, int value) {
3555   return do_mallopt(cmd, value);
3556 }
3557 
3558 #ifdef HAVE_STRUCT_MALLINFO
3559 extern "C" struct mallinfo mallinfo(void) {
3560   return do_mallinfo();
3561 }
3562 #endif
3563 
3564 //-------------------------------------------------------------------
3565 // Some library routines on RedHat 9 allocate memory using malloc()
3566 // and free it using __libc_free() (or vice-versa).  Since we provide
3567 // our own implementations of malloc/free, we need to make sure that
3568 // the __libc_XXX variants (defined as part of glibc) also point to
3569 // the same implementations.
3570 //-------------------------------------------------------------------
3571 
3572 #if defined(__GLIBC__)
3573 extern "C" {
3574 # if defined(__GNUC__) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__)
3575   // Potentially faster variants that use the gcc alias extension.
3576   // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check.
3577 # define ALIAS(x) __attribute__ ((weak, alias (x)))
3578   void* __libc_malloc(size_t size)              ALIAS("malloc");
3579   void  __libc_free(void* ptr)                  ALIAS("free");
3580   void* __libc_realloc(void* ptr, size_t size)  ALIAS("realloc");
3581   void* __libc_calloc(size_t n, size_t size)    ALIAS("calloc");
3582   void  __libc_cfree(void* ptr)                 ALIAS("cfree");
3583   void* __libc_memalign(size_t align, size_t s) ALIAS("memalign");
3584   void* __libc_valloc(size_t size)              ALIAS("valloc");
3585   void* __libc_pvalloc(size_t size)             ALIAS("pvalloc");
3586   int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
3587 # undef ALIAS
3588 # else   /* not __GNUC__ */
3589   // Portable wrappers
3590   void* __libc_malloc(size_t size)              { return malloc(size);       }
3591   void  __libc_free(void* ptr)                  { free(ptr);                 }
3592   void* __libc_realloc(void* ptr, size_t size)  { return realloc(ptr, size); }
3593   void* __libc_calloc(size_t n, size_t size)    { return calloc(n, size);    }
3594   void  __libc_cfree(void* ptr)                 { cfree(ptr);                }
3595   void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); }
3596   void* __libc_valloc(size_t size)              { return valloc(size);       }
3597   void* __libc_pvalloc(size_t size)             { return pvalloc(size);      }
3598   int __posix_memalign(void** r, size_t a, size_t s) {
3599     return posix_memalign(r, a, s);
3600   }
3601 # endif  /* __GNUC__ */
3602 }
3603 #endif   /* __GLIBC__ */
3604 
3605 // Override __libc_memalign in libc on linux boxes specially.
3606 // They have a bug in libc that causes them to (very rarely) allocate
3607 // with __libc_memalign() yet deallocate with free() and the
3608 // definitions above don't catch it.
3609 // This function is an exception to the rule of calling MallocHook method
3610 // from the stack frame of the allocation function;
3611 // heap-checker handles this special case explicitly.
3612 static void *MemalignOverride(size_t align, size_t size, const void *caller)
3613     __THROW {
3614   void* result = do_memalign(align, size);
3615   MallocHook::InvokeNewHook(result, size);
3616   return result;
3617 }
3618 void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
3619 
3620 #endif
3621 
3622 #if defined(WTF_CHANGES) && PLATFORM(DARWIN)
3623 
3624 class FreeObjectFinder {
3625     const RemoteMemoryReader& m_reader;
3626     HashSet<void*> m_freeObjects;
3627 
3628 public:
3629     FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { }
3630 
3631     void visit(void* ptr) { m_freeObjects.add(ptr); }
3632     bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); }
3633     size_t freeObjectCount() const { return m_freeObjects.size(); }
3634 
3635     void findFreeObjects(TCMalloc_ThreadCache* threadCache)
3636     {
3637         for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0))
3638             threadCache->enumerateFreeObjects(*this, m_reader);
3639     }
3640 
3641     void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList)
3642     {
3643         for (unsigned i = 0; i < numSizes; i++)
3644             centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i);
3645     }
3646 };
3647 
3648 class PageMapFreeObjectFinder {
3649     const RemoteMemoryReader& m_reader;
3650     FreeObjectFinder& m_freeObjectFinder;
3651 
3652 public:
3653     PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder)
3654         : m_reader(reader)
3655         , m_freeObjectFinder(freeObjectFinder)
3656     { }
3657 
3658     int visit(void* ptr) const
3659     {
3660         if (!ptr)
3661             return 1;
3662 
3663         Span* span = m_reader(reinterpret_cast<Span*>(ptr));
3664         if (span->free) {
3665             void* ptr = reinterpret_cast<void*>(span->start << kPageShift);
3666             m_freeObjectFinder.visit(ptr);
3667         } else if (span->sizeclass) {
3668             // Walk the free list of the small-object span, keeping track of each object seen
3669             for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast<void**>(nextObject)))
3670                 m_freeObjectFinder.visit(nextObject);
3671         }
3672         return span->length;
3673     }
3674 };
3675 
3676 class PageMapMemoryUsageRecorder {
3677     task_t m_task;
3678     void* m_context;
3679     unsigned m_typeMask;
3680     vm_range_recorder_t* m_recorder;
3681     const RemoteMemoryReader& m_reader;
3682     const FreeObjectFinder& m_freeObjectFinder;
3683     mutable HashSet<void*> m_seenPointers;
3684 
3685 public:
3686     PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder)
3687         : m_task(task)
3688         , m_context(context)
3689         , m_typeMask(typeMask)
3690         , m_recorder(recorder)
3691         , m_reader(reader)
3692         , m_freeObjectFinder(freeObjectFinder)
3693     { }
3694 
3695     int visit(void* ptr) const
3696     {
3697         if (!ptr)
3698             return 1;
3699 
3700         Span* span = m_reader(reinterpret_cast<Span*>(ptr));
3701         if (m_seenPointers.contains(ptr))
3702             return span->length;
3703         m_seenPointers.add(ptr);
3704 
3705         // Mark the memory used for the Span itself as an administrative region
3706         vm_range_t ptrRange = { reinterpret_cast<vm_address_t>(ptr), sizeof(Span) };
3707         if (m_typeMask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE))
3708             (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, &ptrRange, 1);
3709 
3710         ptrRange.address = span->start << kPageShift;
3711         ptrRange.size = span->length * kPageSize;
3712 
3713         // Mark the memory region the span represents as candidates for containing pointers
3714         if (m_typeMask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE))
3715             (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1);
3716 
3717         if (!span->free && (m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) {
3718             // If it's an allocated large object span, mark it as in use
3719             if (span->sizeclass == 0 && !m_freeObjectFinder.isFreeObject(reinterpret_cast<void*>(ptrRange.address)))
3720                 (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, &ptrRange, 1);
3721             else if (span->sizeclass) {
3722                 const size_t byteSize = ByteSizeForClass(span->sizeclass);
3723                 unsigned totalObjects = (span->length << kPageShift) / byteSize;
3724                 ASSERT(span->refcount <= totalObjects);
3725                 char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
3726 
3727                 // Mark each allocated small object within the span as in use
3728                 for (unsigned i = 0; i < totalObjects; i++) {
3729                     char* thisObject = ptr + (i * byteSize);
3730                     if (m_freeObjectFinder.isFreeObject(thisObject))
3731                         continue;
3732 
3733                     vm_range_t objectRange = { reinterpret_cast<vm_address_t>(thisObject), byteSize };
3734                     (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, &objectRange, 1);
3735                 }
3736             }
3737         }
3738 
3739         return span->length;
3740     }
3741 };
3742 
3743 kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder)
3744 {
3745     RemoteMemoryReader memoryReader(task, reader);
3746 
3747     InitSizeClasses();
3748 
3749     FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress));
3750     TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap);
3751     TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps);
3752     TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer);
3753 
3754     TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses);
3755 
3756     FreeObjectFinder finder(memoryReader);
3757     finder.findFreeObjects(threadHeaps);
3758     finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches);
3759 
3760     TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_;
3761     PageMapFreeObjectFinder pageMapFinder(memoryReader, finder);
3762     pageMap->visit(pageMapFinder, memoryReader);
3763 
3764     PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder);
3765     pageMap->visit(usageRecorder, memoryReader);
3766 
3767     return 0;
3768 }
3769 
3770 size_t FastMallocZone::size(malloc_zone_t*, const void*)
3771 {
3772     return 0;
3773 }
3774 
3775 void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t)
3776 {
3777     return 0;
3778 }
3779 
3780 void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t)
3781 {
3782     return 0;
3783 }
3784 
3785 void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr)
3786 {
3787     // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer
3788     // is not in this zone.  When this happens, the pointer being freed was not allocated by any
3789     // zone so we need to print a useful error for the application developer.
3790     malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr);
3791 }
3792 
3793 void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t)
3794 {
3795     return 0;
3796 }
3797 
3798 
3799 #undef malloc
3800 #undef free
3801 #undef realloc
3802 #undef calloc
3803 
3804 extern "C" {
3805 malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print,
3806     &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics };
3807 }
3808 
3809 FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches)
3810     : m_pageHeap(pageHeap)
3811     , m_threadHeaps(threadHeaps)
3812     , m_centralCaches(centralCaches)
3813 {
3814     memset(&m_zone, 0, sizeof(m_zone));
3815     m_zone.zone_name = "JavaScriptCore FastMalloc";
3816     m_zone.size = &FastMallocZone::size;
3817     m_zone.malloc = &FastMallocZone::zoneMalloc;
3818     m_zone.calloc = &FastMallocZone::zoneCalloc;
3819     m_zone.realloc = &FastMallocZone::zoneRealloc;
3820     m_zone.free = &FastMallocZone::zoneFree;
3821     m_zone.valloc = &FastMallocZone::zoneValloc;
3822     m_zone.destroy = &FastMallocZone::zoneDestroy;
3823     m_zone.introspect = &jscore_fastmalloc_introspection;
3824     malloc_zone_register(&m_zone);
3825 }
3826 
3827 
3828 void FastMallocZone::init()
3829 {
3830     static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache));
3831 }
3832 
3833 #endif
3834 
3835 #if WTF_CHANGES
3836 void releaseFastMallocFreeMemory()
3837 {
3838     // Flush free pages in the current thread cache back to the page heap.
3839     // Low watermark mechanism in Scavenge() prevents full return on the first pass.
3840     // The second pass flushes everything.
3841     if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) {
3842         threadCache->Scavenge();
3843         threadCache->Scavenge();
3844     }
3845 
3846     SpinLockHolder h(&pageheap_lock);
3847     pageheap->ReleaseFreePages();
3848 }
3849 
3850 FastMallocStatistics fastMallocStatistics()
3851 {
3852     FastMallocStatistics statistics;
3853     {
3854         SpinLockHolder lockHolder(&pageheap_lock);
3855         statistics.heapSize = static_cast<size_t>(pageheap->SystemBytes());
3856         statistics.freeSizeInHeap = static_cast<size_t>(pageheap->FreeBytes());
3857         statistics.returnedSize = pageheap->ReturnedBytes();
3858         statistics.freeSizeInCaches = 0;
3859         for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_)
3860             statistics.freeSizeInCaches += threadCache->Size();
3861     }
3862     for (unsigned cl = 0; cl < kNumClasses; ++cl) {
3863         const int length = central_cache[cl].length();
3864         const int tc_length = central_cache[cl].tc_length();
3865         statistics.freeSizeInCaches += ByteSizeForClass(cl) * (length + tc_length);
3866     }
3867     return statistics;
3868 }
3869 
3870 } // namespace WTF
3871 #endif
3872 
3873 #endif // FORCE_SYSTEM_MALLOC
3874