• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #define	JEMALLOC_PROF_C_
2 #include "jemalloc/internal/jemalloc_internal.h"
3 /******************************************************************************/
4 
5 #ifdef JEMALLOC_PROF_LIBUNWIND
6 #define	UNW_LOCAL_ONLY
7 #include <libunwind.h>
8 #endif
9 
10 #ifdef JEMALLOC_PROF_LIBGCC
11 #include <unwind.h>
12 #endif
13 
14 /******************************************************************************/
15 /* Data. */
16 
17 malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL)
18 
19 bool		opt_prof = false;
20 bool		opt_prof_active = true;
21 size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
22 ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
23 bool		opt_prof_gdump = false;
24 bool		opt_prof_final = true;
25 bool		opt_prof_leak = false;
26 bool		opt_prof_accum = false;
27 char		opt_prof_prefix[
28     /* Minimize memory bloat for non-prof builds. */
29 #ifdef JEMALLOC_PROF
30     PATH_MAX +
31 #endif
32     1];
33 
34 uint64_t	prof_interval = 0;
35 
36 /*
37  * Table of mutexes that are shared among ctx's.  These are leaf locks, so
38  * there is no problem with using them for more than one ctx at the same time.
39  * The primary motivation for this sharing though is that ctx's are ephemeral,
40  * and destroying mutexes causes complications for systems that allocate when
41  * creating/destroying mutexes.
42  */
43 static malloc_mutex_t	*ctx_locks;
44 static unsigned		cum_ctxs; /* Atomic counter. */
45 
46 /*
47  * Global hash of (prof_bt_t *)-->(prof_ctx_t *).  This is the master data
48  * structure that knows about all backtraces currently captured.
49  */
50 static ckh_t		bt2ctx;
51 static malloc_mutex_t	bt2ctx_mtx;
52 
53 static malloc_mutex_t	prof_dump_seq_mtx;
54 static uint64_t		prof_dump_seq;
55 static uint64_t		prof_dump_iseq;
56 static uint64_t		prof_dump_mseq;
57 static uint64_t		prof_dump_useq;
58 
59 /*
60  * This buffer is rather large for stack allocation, so use a single buffer for
61  * all profile dumps.
62  */
63 static malloc_mutex_t	prof_dump_mtx;
64 static char		prof_dump_buf[
65     /* Minimize memory bloat for non-prof builds. */
66 #ifdef JEMALLOC_PROF
67     PROF_DUMP_BUFSIZE
68 #else
69     1
70 #endif
71 ];
72 static unsigned		prof_dump_buf_end;
73 static int		prof_dump_fd;
74 
75 /* Do not dump any profiles until bootstrapping is complete. */
76 static bool		prof_booted = false;
77 
78 /******************************************************************************/
79 
80 void
bt_init(prof_bt_t * bt,void ** vec)81 bt_init(prof_bt_t *bt, void **vec)
82 {
83 
84 	cassert(config_prof);
85 
86 	bt->vec = vec;
87 	bt->len = 0;
88 }
89 
90 static void
bt_destroy(prof_bt_t * bt)91 bt_destroy(prof_bt_t *bt)
92 {
93 
94 	cassert(config_prof);
95 
96 	idalloc(bt);
97 }
98 
99 static prof_bt_t *
bt_dup(prof_bt_t * bt)100 bt_dup(prof_bt_t *bt)
101 {
102 	prof_bt_t *ret;
103 
104 	cassert(config_prof);
105 
106 	/*
107 	 * Create a single allocation that has space for vec immediately
108 	 * following the prof_bt_t structure.  The backtraces that get
109 	 * stored in the backtrace caches are copied from stack-allocated
110 	 * temporary variables, so size is known at creation time.  Making this
111 	 * a contiguous object improves cache locality.
112 	 */
113 	ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
114 	    (bt->len * sizeof(void *)));
115 	if (ret == NULL)
116 		return (NULL);
117 	ret->vec = (void **)((uintptr_t)ret +
118 	    QUANTUM_CEILING(sizeof(prof_bt_t)));
119 	memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
120 	ret->len = bt->len;
121 
122 	return (ret);
123 }
124 
125 static inline void
prof_enter(prof_tdata_t * prof_tdata)126 prof_enter(prof_tdata_t *prof_tdata)
127 {
128 
129 	cassert(config_prof);
130 
131 	assert(prof_tdata->enq == false);
132 	prof_tdata->enq = true;
133 
134 	malloc_mutex_lock(&bt2ctx_mtx);
135 }
136 
137 static inline void
prof_leave(prof_tdata_t * prof_tdata)138 prof_leave(prof_tdata_t *prof_tdata)
139 {
140 	bool idump, gdump;
141 
142 	cassert(config_prof);
143 
144 	malloc_mutex_unlock(&bt2ctx_mtx);
145 
146 	assert(prof_tdata->enq);
147 	prof_tdata->enq = false;
148 	idump = prof_tdata->enq_idump;
149 	prof_tdata->enq_idump = false;
150 	gdump = prof_tdata->enq_gdump;
151 	prof_tdata->enq_gdump = false;
152 
153 	if (idump)
154 		prof_idump();
155 	if (gdump)
156 		prof_gdump();
157 }
158 
159 #ifdef JEMALLOC_PROF_LIBUNWIND
160 void
prof_backtrace(prof_bt_t * bt)161 prof_backtrace(prof_bt_t *bt)
162 {
163 	int nframes;
164 
165 	cassert(config_prof);
166 	assert(bt->len == 0);
167 	assert(bt->vec != NULL);
168 
169 	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
170 	if (nframes <= 0)
171 		return;
172 	bt->len = nframes;
173 }
174 #elif (defined(JEMALLOC_PROF_LIBGCC))
175 static _Unwind_Reason_Code
prof_unwind_init_callback(struct _Unwind_Context * context,void * arg)176 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
177 {
178 
179 	cassert(config_prof);
180 
181 	return (_URC_NO_REASON);
182 }
183 
184 static _Unwind_Reason_Code
prof_unwind_callback(struct _Unwind_Context * context,void * arg)185 prof_unwind_callback(struct _Unwind_Context *context, void *arg)
186 {
187 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
188 	void *ip;
189 
190 	cassert(config_prof);
191 
192 	ip = (void *)_Unwind_GetIP(context);
193 	if (ip == NULL)
194 		return (_URC_END_OF_STACK);
195 	data->bt->vec[data->bt->len] = ip;
196 	data->bt->len++;
197 	if (data->bt->len == data->max)
198 		return (_URC_END_OF_STACK);
199 
200 	return (_URC_NO_REASON);
201 }
202 
203 void
prof_backtrace(prof_bt_t * bt)204 prof_backtrace(prof_bt_t *bt)
205 {
206 	prof_unwind_data_t data = {bt, PROF_BT_MAX};
207 
208 	cassert(config_prof);
209 
210 	_Unwind_Backtrace(prof_unwind_callback, &data);
211 }
212 #elif (defined(JEMALLOC_PROF_GCC))
213 void
prof_backtrace(prof_bt_t * bt)214 prof_backtrace(prof_bt_t *bt)
215 {
216 #define	BT_FRAME(i)							\
217 	if ((i) < PROF_BT_MAX) {					\
218 		void *p;						\
219 		if (__builtin_frame_address(i) == 0)			\
220 			return;						\
221 		p = __builtin_return_address(i);			\
222 		if (p == NULL)						\
223 			return;						\
224 		bt->vec[(i)] = p;					\
225 		bt->len = (i) + 1;					\
226 	} else								\
227 		return;
228 
229 	cassert(config_prof);
230 
231 	BT_FRAME(0)
232 	BT_FRAME(1)
233 	BT_FRAME(2)
234 	BT_FRAME(3)
235 	BT_FRAME(4)
236 	BT_FRAME(5)
237 	BT_FRAME(6)
238 	BT_FRAME(7)
239 	BT_FRAME(8)
240 	BT_FRAME(9)
241 
242 	BT_FRAME(10)
243 	BT_FRAME(11)
244 	BT_FRAME(12)
245 	BT_FRAME(13)
246 	BT_FRAME(14)
247 	BT_FRAME(15)
248 	BT_FRAME(16)
249 	BT_FRAME(17)
250 	BT_FRAME(18)
251 	BT_FRAME(19)
252 
253 	BT_FRAME(20)
254 	BT_FRAME(21)
255 	BT_FRAME(22)
256 	BT_FRAME(23)
257 	BT_FRAME(24)
258 	BT_FRAME(25)
259 	BT_FRAME(26)
260 	BT_FRAME(27)
261 	BT_FRAME(28)
262 	BT_FRAME(29)
263 
264 	BT_FRAME(30)
265 	BT_FRAME(31)
266 	BT_FRAME(32)
267 	BT_FRAME(33)
268 	BT_FRAME(34)
269 	BT_FRAME(35)
270 	BT_FRAME(36)
271 	BT_FRAME(37)
272 	BT_FRAME(38)
273 	BT_FRAME(39)
274 
275 	BT_FRAME(40)
276 	BT_FRAME(41)
277 	BT_FRAME(42)
278 	BT_FRAME(43)
279 	BT_FRAME(44)
280 	BT_FRAME(45)
281 	BT_FRAME(46)
282 	BT_FRAME(47)
283 	BT_FRAME(48)
284 	BT_FRAME(49)
285 
286 	BT_FRAME(50)
287 	BT_FRAME(51)
288 	BT_FRAME(52)
289 	BT_FRAME(53)
290 	BT_FRAME(54)
291 	BT_FRAME(55)
292 	BT_FRAME(56)
293 	BT_FRAME(57)
294 	BT_FRAME(58)
295 	BT_FRAME(59)
296 
297 	BT_FRAME(60)
298 	BT_FRAME(61)
299 	BT_FRAME(62)
300 	BT_FRAME(63)
301 	BT_FRAME(64)
302 	BT_FRAME(65)
303 	BT_FRAME(66)
304 	BT_FRAME(67)
305 	BT_FRAME(68)
306 	BT_FRAME(69)
307 
308 	BT_FRAME(70)
309 	BT_FRAME(71)
310 	BT_FRAME(72)
311 	BT_FRAME(73)
312 	BT_FRAME(74)
313 	BT_FRAME(75)
314 	BT_FRAME(76)
315 	BT_FRAME(77)
316 	BT_FRAME(78)
317 	BT_FRAME(79)
318 
319 	BT_FRAME(80)
320 	BT_FRAME(81)
321 	BT_FRAME(82)
322 	BT_FRAME(83)
323 	BT_FRAME(84)
324 	BT_FRAME(85)
325 	BT_FRAME(86)
326 	BT_FRAME(87)
327 	BT_FRAME(88)
328 	BT_FRAME(89)
329 
330 	BT_FRAME(90)
331 	BT_FRAME(91)
332 	BT_FRAME(92)
333 	BT_FRAME(93)
334 	BT_FRAME(94)
335 	BT_FRAME(95)
336 	BT_FRAME(96)
337 	BT_FRAME(97)
338 	BT_FRAME(98)
339 	BT_FRAME(99)
340 
341 	BT_FRAME(100)
342 	BT_FRAME(101)
343 	BT_FRAME(102)
344 	BT_FRAME(103)
345 	BT_FRAME(104)
346 	BT_FRAME(105)
347 	BT_FRAME(106)
348 	BT_FRAME(107)
349 	BT_FRAME(108)
350 	BT_FRAME(109)
351 
352 	BT_FRAME(110)
353 	BT_FRAME(111)
354 	BT_FRAME(112)
355 	BT_FRAME(113)
356 	BT_FRAME(114)
357 	BT_FRAME(115)
358 	BT_FRAME(116)
359 	BT_FRAME(117)
360 	BT_FRAME(118)
361 	BT_FRAME(119)
362 
363 	BT_FRAME(120)
364 	BT_FRAME(121)
365 	BT_FRAME(122)
366 	BT_FRAME(123)
367 	BT_FRAME(124)
368 	BT_FRAME(125)
369 	BT_FRAME(126)
370 	BT_FRAME(127)
371 #undef BT_FRAME
372 }
373 #else
374 void
prof_backtrace(prof_bt_t * bt)375 prof_backtrace(prof_bt_t *bt)
376 {
377 
378 	cassert(config_prof);
379 	not_reached();
380 }
381 #endif
382 
383 static malloc_mutex_t *
prof_ctx_mutex_choose(void)384 prof_ctx_mutex_choose(void)
385 {
386 	unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
387 
388 	return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
389 }
390 
391 static void
prof_ctx_init(prof_ctx_t * ctx,prof_bt_t * bt)392 prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt)
393 {
394 
395 	ctx->bt = bt;
396 	ctx->lock = prof_ctx_mutex_choose();
397 	/*
398 	 * Set nlimbo to 1, in order to avoid a race condition with
399 	 * prof_ctx_merge()/prof_ctx_destroy().
400 	 */
401 	ctx->nlimbo = 1;
402 	ql_elm_new(ctx, dump_link);
403 	memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
404 	ql_new(&ctx->cnts_ql);
405 }
406 
407 static void
prof_ctx_destroy(prof_ctx_t * ctx)408 prof_ctx_destroy(prof_ctx_t *ctx)
409 {
410 	prof_tdata_t *prof_tdata;
411 
412 	cassert(config_prof);
413 
414 	/*
415 	 * Check that ctx is still unused by any thread cache before destroying
416 	 * it.  prof_lookup() increments ctx->nlimbo in order to avoid a race
417 	 * condition with this function, as does prof_ctx_merge() in order to
418 	 * avoid a race between the main body of prof_ctx_merge() and entry
419 	 * into this function.
420 	 */
421 	prof_tdata = prof_tdata_get(false);
422 	assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
423 	prof_enter(prof_tdata);
424 	malloc_mutex_lock(ctx->lock);
425 	if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
426 	    ctx->nlimbo == 1) {
427 		assert(ctx->cnt_merged.curbytes == 0);
428 		assert(ctx->cnt_merged.accumobjs == 0);
429 		assert(ctx->cnt_merged.accumbytes == 0);
430 		/* Remove ctx from bt2ctx. */
431 		if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
432 			not_reached();
433 		prof_leave(prof_tdata);
434 		/* Destroy ctx. */
435 		malloc_mutex_unlock(ctx->lock);
436 		bt_destroy(ctx->bt);
437 		idalloc(ctx);
438 	} else {
439 		/*
440 		 * Compensate for increment in prof_ctx_merge() or
441 		 * prof_lookup().
442 		 */
443 		ctx->nlimbo--;
444 		malloc_mutex_unlock(ctx->lock);
445 		prof_leave(prof_tdata);
446 	}
447 }
448 
449 static void
prof_ctx_merge(prof_ctx_t * ctx,prof_thr_cnt_t * cnt)450 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
451 {
452 	bool destroy;
453 
454 	cassert(config_prof);
455 
456 	/* Merge cnt stats and detach from ctx. */
457 	malloc_mutex_lock(ctx->lock);
458 	ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
459 	ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
460 	ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
461 	ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
462 	ql_remove(&ctx->cnts_ql, cnt, cnts_link);
463 	if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
464 	    ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
465 		/*
466 		 * Increment ctx->nlimbo in order to keep another thread from
467 		 * winning the race to destroy ctx while this one has ctx->lock
468 		 * dropped.  Without this, it would be possible for another
469 		 * thread to:
470 		 *
471 		 * 1) Sample an allocation associated with ctx.
472 		 * 2) Deallocate the sampled object.
473 		 * 3) Successfully prof_ctx_destroy(ctx).
474 		 *
475 		 * The result would be that ctx no longer exists by the time
476 		 * this thread accesses it in prof_ctx_destroy().
477 		 */
478 		ctx->nlimbo++;
479 		destroy = true;
480 	} else
481 		destroy = false;
482 	malloc_mutex_unlock(ctx->lock);
483 	if (destroy)
484 		prof_ctx_destroy(ctx);
485 }
486 
487 static bool
prof_lookup_global(prof_bt_t * bt,prof_tdata_t * prof_tdata,void ** p_btkey,prof_ctx_t ** p_ctx,bool * p_new_ctx)488 prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey,
489     prof_ctx_t **p_ctx, bool *p_new_ctx)
490 {
491 	union {
492 		prof_ctx_t	*p;
493 		void		*v;
494 	} ctx;
495 	union {
496 		prof_bt_t	*p;
497 		void		*v;
498 	} btkey;
499 	bool new_ctx;
500 
501 	prof_enter(prof_tdata);
502 	if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
503 		/* bt has never been seen before.  Insert it. */
504 		ctx.v = imalloc(sizeof(prof_ctx_t));
505 		if (ctx.v == NULL) {
506 			prof_leave(prof_tdata);
507 			return (true);
508 		}
509 		btkey.p = bt_dup(bt);
510 		if (btkey.v == NULL) {
511 			prof_leave(prof_tdata);
512 			idalloc(ctx.v);
513 			return (true);
514 		}
515 		prof_ctx_init(ctx.p, btkey.p);
516 		if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
517 			/* OOM. */
518 			prof_leave(prof_tdata);
519 			idalloc(btkey.v);
520 			idalloc(ctx.v);
521 			return (true);
522 		}
523 		new_ctx = true;
524 	} else {
525 		/*
526 		 * Increment nlimbo, in order to avoid a race condition with
527 		 * prof_ctx_merge()/prof_ctx_destroy().
528 		 */
529 		malloc_mutex_lock(ctx.p->lock);
530 		ctx.p->nlimbo++;
531 		malloc_mutex_unlock(ctx.p->lock);
532 		new_ctx = false;
533 	}
534 	prof_leave(prof_tdata);
535 
536 	*p_btkey = btkey.v;
537 	*p_ctx = ctx.p;
538 	*p_new_ctx = new_ctx;
539 	return (false);
540 }
541 
542 prof_thr_cnt_t *
prof_lookup(prof_bt_t * bt)543 prof_lookup(prof_bt_t *bt)
544 {
545 	union {
546 		prof_thr_cnt_t	*p;
547 		void		*v;
548 	} ret;
549 	prof_tdata_t *prof_tdata;
550 
551 	cassert(config_prof);
552 
553 	prof_tdata = prof_tdata_get(false);
554 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
555 		return (NULL);
556 
557 	if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
558 		void *btkey;
559 		prof_ctx_t *ctx;
560 		bool new_ctx;
561 
562 		/*
563 		 * This thread's cache lacks bt.  Look for it in the global
564 		 * cache.
565 		 */
566 		if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx))
567 			return (NULL);
568 
569 		/* Link a prof_thd_cnt_t into ctx for this thread. */
570 		if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
571 			assert(ckh_count(&prof_tdata->bt2cnt) > 0);
572 			/*
573 			 * Flush the least recently used cnt in order to keep
574 			 * bt2cnt from becoming too large.
575 			 */
576 			ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
577 			assert(ret.v != NULL);
578 			if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
579 			    NULL, NULL))
580 				not_reached();
581 			ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
582 			prof_ctx_merge(ret.p->ctx, ret.p);
583 			/* ret can now be re-used. */
584 		} else {
585 			assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX);
586 			/* Allocate and partially initialize a new cnt. */
587 			ret.v = imalloc(sizeof(prof_thr_cnt_t));
588 			if (ret.p == NULL) {
589 				if (new_ctx)
590 					prof_ctx_destroy(ctx);
591 				return (NULL);
592 			}
593 			ql_elm_new(ret.p, cnts_link);
594 			ql_elm_new(ret.p, lru_link);
595 		}
596 		/* Finish initializing ret. */
597 		ret.p->ctx = ctx;
598 		ret.p->epoch = 0;
599 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
600 		if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) {
601 			if (new_ctx)
602 				prof_ctx_destroy(ctx);
603 			idalloc(ret.v);
604 			return (NULL);
605 		}
606 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
607 		malloc_mutex_lock(ctx->lock);
608 		ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link);
609 		ctx->nlimbo--;
610 		malloc_mutex_unlock(ctx->lock);
611 	} else {
612 		/* Move ret to the front of the LRU. */
613 		ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
614 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
615 	}
616 
617 	return (ret.p);
618 }
619 
620 
621 void
prof_sample_threshold_update(prof_tdata_t * prof_tdata)622 prof_sample_threshold_update(prof_tdata_t *prof_tdata)
623 {
624 	/*
625 	 * The body of this function is compiled out unless heap profiling is
626 	 * enabled, so that it is possible to compile jemalloc with floating
627 	 * point support completely disabled.  Avoiding floating point code is
628 	 * important on memory-constrained systems, but it also enables a
629 	 * workaround for versions of glibc that don't properly save/restore
630 	 * floating point registers during dynamic lazy symbol loading (which
631 	 * internally calls into whatever malloc implementation happens to be
632 	 * integrated into the application).  Note that some compilers (e.g.
633 	 * gcc 4.8) may use floating point registers for fast memory moves, so
634 	 * jemalloc must be compiled with such optimizations disabled (e.g.
635 	 * -mno-sse) in order for the workaround to be complete.
636 	 */
637 #ifdef JEMALLOC_PROF
638 	uint64_t r;
639 	double u;
640 
641 	if (!config_prof)
642 		return;
643 
644 	if (prof_tdata == NULL)
645 		prof_tdata = prof_tdata_get(false);
646 
647 	if (opt_lg_prof_sample == 0) {
648 		prof_tdata->bytes_until_sample = 0;
649 		return;
650 	}
651 
652 	/*
653 	 * Compute sample threshold as a geometrically distributed random
654 	 * variable with mean (2^opt_lg_prof_sample).
655 	 *
656 	 *                         __        __
657 	 *                         |  log(u)  |                     1
658 	 * prof_tdata->threshold = | -------- |, where p = -------------------
659 	 *                         | log(1-p) |             opt_lg_prof_sample
660 	 *                                                 2
661 	 *
662 	 * For more information on the math, see:
663 	 *
664 	 *   Non-Uniform Random Variate Generation
665 	 *   Luc Devroye
666 	 *   Springer-Verlag, New York, 1986
667 	 *   pp 500
668 	 *   (http://luc.devroye.org/rnbookindex.html)
669 	 */
670 	prng64(r, 53, prof_tdata->prng_state,
671 	    UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
672 	u = (double)r * (1.0/9007199254740992.0L);
673 	prof_tdata->bytes_until_sample = (uint64_t)(log(u) /
674 	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
675 	    + (uint64_t)1U;
676 #endif
677 }
678 
679 
680 #ifdef JEMALLOC_JET
681 size_t
prof_bt_count(void)682 prof_bt_count(void)
683 {
684 	size_t bt_count;
685 	prof_tdata_t *prof_tdata;
686 
687 	prof_tdata = prof_tdata_get(false);
688 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
689 		return (0);
690 
691 	prof_enter(prof_tdata);
692 	bt_count = ckh_count(&bt2ctx);
693 	prof_leave(prof_tdata);
694 
695 	return (bt_count);
696 }
697 #endif
698 
699 #ifdef JEMALLOC_JET
700 #undef prof_dump_open
701 #define	prof_dump_open JEMALLOC_N(prof_dump_open_impl)
702 #endif
703 static int
prof_dump_open(bool propagate_err,const char * filename)704 prof_dump_open(bool propagate_err, const char *filename)
705 {
706 	int fd;
707 
708 	fd = creat(filename, 0644);
709 	if (fd == -1 && propagate_err == false) {
710 		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
711 		    filename);
712 		if (opt_abort)
713 			abort();
714 	}
715 
716 	return (fd);
717 }
718 #ifdef JEMALLOC_JET
719 #undef prof_dump_open
720 #define	prof_dump_open JEMALLOC_N(prof_dump_open)
721 prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
722 #endif
723 
724 static bool
prof_dump_flush(bool propagate_err)725 prof_dump_flush(bool propagate_err)
726 {
727 	bool ret = false;
728 	ssize_t err;
729 
730 	cassert(config_prof);
731 
732 	err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
733 	if (err == -1) {
734 		if (propagate_err == false) {
735 			malloc_write("<jemalloc>: write() failed during heap "
736 			    "profile flush\n");
737 			if (opt_abort)
738 				abort();
739 		}
740 		ret = true;
741 	}
742 	prof_dump_buf_end = 0;
743 
744 	return (ret);
745 }
746 
747 static bool
prof_dump_close(bool propagate_err)748 prof_dump_close(bool propagate_err)
749 {
750 	bool ret;
751 
752 	assert(prof_dump_fd != -1);
753 	ret = prof_dump_flush(propagate_err);
754 	close(prof_dump_fd);
755 	prof_dump_fd = -1;
756 
757 	return (ret);
758 }
759 
760 static bool
prof_dump_write(bool propagate_err,const char * s)761 prof_dump_write(bool propagate_err, const char *s)
762 {
763 	unsigned i, slen, n;
764 
765 	cassert(config_prof);
766 
767 	i = 0;
768 	slen = strlen(s);
769 	while (i < slen) {
770 		/* Flush the buffer if it is full. */
771 		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
772 			if (prof_dump_flush(propagate_err) && propagate_err)
773 				return (true);
774 
775 		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
776 			/* Finish writing. */
777 			n = slen - i;
778 		} else {
779 			/* Write as much of s as will fit. */
780 			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
781 		}
782 		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
783 		prof_dump_buf_end += n;
784 		i += n;
785 	}
786 
787 	return (false);
788 }
789 
790 JEMALLOC_ATTR(format(printf, 2, 3))
791 static bool
prof_dump_printf(bool propagate_err,const char * format,...)792 prof_dump_printf(bool propagate_err, const char *format, ...)
793 {
794 	bool ret;
795 	va_list ap;
796 	char buf[PROF_PRINTF_BUFSIZE];
797 
798 	va_start(ap, format);
799 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
800 	va_end(ap);
801 	ret = prof_dump_write(propagate_err, buf);
802 
803 	return (ret);
804 }
805 
806 static void
prof_dump_ctx_prep(prof_ctx_t * ctx,prof_cnt_t * cnt_all,size_t * leak_nctx,prof_ctx_list_t * ctx_ql)807 prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx,
808     prof_ctx_list_t *ctx_ql)
809 {
810 	prof_thr_cnt_t *thr_cnt;
811 	prof_cnt_t tcnt;
812 
813 	cassert(config_prof);
814 
815 	malloc_mutex_lock(ctx->lock);
816 
817 	/*
818 	 * Increment nlimbo so that ctx won't go away before dump.
819 	 * Additionally, link ctx into the dump list so that it is included in
820 	 * prof_dump()'s second pass.
821 	 */
822 	ctx->nlimbo++;
823 	ql_tail_insert(ctx_ql, ctx, dump_link);
824 
825 	memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
826 	ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
827 		volatile unsigned *epoch = &thr_cnt->epoch;
828 
829 		while (true) {
830 			unsigned epoch0 = *epoch;
831 
832 			/* Make sure epoch is even. */
833 			if (epoch0 & 1U)
834 				continue;
835 
836 			memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
837 
838 			/* Terminate if epoch didn't change while reading. */
839 			if (*epoch == epoch0)
840 				break;
841 		}
842 
843 		ctx->cnt_summed.curobjs += tcnt.curobjs;
844 		ctx->cnt_summed.curbytes += tcnt.curbytes;
845 		if (opt_prof_accum) {
846 			ctx->cnt_summed.accumobjs += tcnt.accumobjs;
847 			ctx->cnt_summed.accumbytes += tcnt.accumbytes;
848 		}
849 	}
850 
851 	if (ctx->cnt_summed.curobjs != 0)
852 		(*leak_nctx)++;
853 
854 	/* Add to cnt_all. */
855 	cnt_all->curobjs += ctx->cnt_summed.curobjs;
856 	cnt_all->curbytes += ctx->cnt_summed.curbytes;
857 	if (opt_prof_accum) {
858 		cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
859 		cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
860 	}
861 
862 	malloc_mutex_unlock(ctx->lock);
863 }
864 
865 static bool
prof_dump_header(bool propagate_err,const prof_cnt_t * cnt_all)866 prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
867 {
868 
869 	if (opt_lg_prof_sample == 0) {
870 		if (prof_dump_printf(propagate_err,
871 		    "heap profile: %"PRId64": %"PRId64
872 		    " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
873 		    cnt_all->curobjs, cnt_all->curbytes,
874 		    cnt_all->accumobjs, cnt_all->accumbytes))
875 			return (true);
876 	} else {
877 		if (prof_dump_printf(propagate_err,
878 		    "heap profile: %"PRId64": %"PRId64
879 		    " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
880 		    cnt_all->curobjs, cnt_all->curbytes,
881 		    cnt_all->accumobjs, cnt_all->accumbytes,
882 		    ((uint64_t)1U << opt_lg_prof_sample)))
883 			return (true);
884 	}
885 
886 	return (false);
887 }
888 
889 static void
prof_dump_ctx_cleanup_locked(prof_ctx_t * ctx,prof_ctx_list_t * ctx_ql)890 prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
891 {
892 
893 	ctx->nlimbo--;
894 	ql_remove(ctx_ql, ctx, dump_link);
895 }
896 
897 static void
prof_dump_ctx_cleanup(prof_ctx_t * ctx,prof_ctx_list_t * ctx_ql)898 prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
899 {
900 
901 	malloc_mutex_lock(ctx->lock);
902 	prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
903 	malloc_mutex_unlock(ctx->lock);
904 }
905 
906 static bool
prof_dump_ctx(bool propagate_err,prof_ctx_t * ctx,const prof_bt_t * bt,prof_ctx_list_t * ctx_ql)907 prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt,
908     prof_ctx_list_t *ctx_ql)
909 {
910 	bool ret;
911 	unsigned i;
912 
913 	cassert(config_prof);
914 
915 	/*
916 	 * Current statistics can sum to 0 as a result of unmerged per thread
917 	 * statistics.  Additionally, interval- and growth-triggered dumps can
918 	 * occur between the time a ctx is created and when its statistics are
919 	 * filled in.  Avoid dumping any ctx that is an artifact of either
920 	 * implementation detail.
921 	 */
922 	malloc_mutex_lock(ctx->lock);
923 	if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
924 	    (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
925 		assert(ctx->cnt_summed.curobjs == 0);
926 		assert(ctx->cnt_summed.curbytes == 0);
927 		assert(ctx->cnt_summed.accumobjs == 0);
928 		assert(ctx->cnt_summed.accumbytes == 0);
929 		ret = false;
930 		goto label_return;
931 	}
932 
933 	if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64
934 	    " [%"PRIu64": %"PRIu64"] @",
935 	    ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
936 	    ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) {
937 		ret = true;
938 		goto label_return;
939 	}
940 
941 	for (i = 0; i < bt->len; i++) {
942 		if (prof_dump_printf(propagate_err, " %#"PRIxPTR,
943 		    (uintptr_t)bt->vec[i])) {
944 			ret = true;
945 			goto label_return;
946 		}
947 	}
948 
949 	if (prof_dump_write(propagate_err, "\n")) {
950 		ret = true;
951 		goto label_return;
952 	}
953 
954 	ret = false;
955 label_return:
956 	prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
957 	malloc_mutex_unlock(ctx->lock);
958 	return (ret);
959 }
960 
961 static bool
prof_dump_maps(bool propagate_err)962 prof_dump_maps(bool propagate_err)
963 {
964 	bool ret;
965 	int mfd;
966 	char filename[PATH_MAX + 1];
967 
968 	cassert(config_prof);
969 #ifdef __FreeBSD__
970 	malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map");
971 #else
972 	malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps",
973 	    (int)getpid());
974 #endif
975 	mfd = open(filename, O_RDONLY);
976 	if (mfd != -1) {
977 		ssize_t nread;
978 
979 		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
980 		    propagate_err) {
981 			ret = true;
982 			goto label_return;
983 		}
984 		nread = 0;
985 		do {
986 			prof_dump_buf_end += nread;
987 			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
988 				/* Make space in prof_dump_buf before read(). */
989 				if (prof_dump_flush(propagate_err) &&
990 				    propagate_err) {
991 					ret = true;
992 					goto label_return;
993 				}
994 			}
995 			nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
996 			    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
997 		} while (nread > 0);
998 	} else {
999 		ret = true;
1000 		goto label_return;
1001 	}
1002 
1003 	ret = false;
1004 label_return:
1005 	if (mfd != -1)
1006 		close(mfd);
1007 	return (ret);
1008 }
1009 
1010 static void
prof_leakcheck(const prof_cnt_t * cnt_all,size_t leak_nctx,const char * filename)1011 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx,
1012     const char *filename)
1013 {
1014 
1015 	if (cnt_all->curbytes != 0) {
1016 		malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
1017 		    PRId64" object%s, %zu context%s\n",
1018 		    cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
1019 		    cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
1020 		    leak_nctx, (leak_nctx != 1) ? "s" : "");
1021 		malloc_printf(
1022 		    "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
1023 		    filename);
1024 	}
1025 }
1026 
1027 static bool
prof_dump(bool propagate_err,const char * filename,bool leakcheck)1028 prof_dump(bool propagate_err, const char *filename, bool leakcheck)
1029 {
1030 	prof_tdata_t *prof_tdata;
1031 	prof_cnt_t cnt_all;
1032 	size_t tabind;
1033 	union {
1034 		prof_ctx_t	*p;
1035 		void		*v;
1036 	} ctx;
1037 	size_t leak_nctx;
1038 	prof_ctx_list_t ctx_ql;
1039 
1040 	cassert(config_prof);
1041 
1042 	prof_tdata = prof_tdata_get(false);
1043 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1044 		return (true);
1045 
1046 	malloc_mutex_lock(&prof_dump_mtx);
1047 
1048 	/* Merge per thread profile stats, and sum them in cnt_all. */
1049 	memset(&cnt_all, 0, sizeof(prof_cnt_t));
1050 	leak_nctx = 0;
1051 	ql_new(&ctx_ql);
1052 	prof_enter(prof_tdata);
1053 	for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
1054 		prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql);
1055 	prof_leave(prof_tdata);
1056 
1057 	/* Create dump file. */
1058 	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
1059 		goto label_open_close_error;
1060 
1061 	/* Dump profile header. */
1062 	if (prof_dump_header(propagate_err, &cnt_all))
1063 		goto label_write_error;
1064 
1065 	/* Dump per ctx profile stats. */
1066 	while ((ctx.p = ql_first(&ctx_ql)) != NULL) {
1067 		if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql))
1068 			goto label_write_error;
1069 	}
1070 
1071 	/* Dump /proc/<pid>/maps if possible. */
1072 	if (prof_dump_maps(propagate_err))
1073 		goto label_write_error;
1074 
1075 	if (prof_dump_close(propagate_err))
1076 		goto label_open_close_error;
1077 
1078 	malloc_mutex_unlock(&prof_dump_mtx);
1079 
1080 	if (leakcheck)
1081 		prof_leakcheck(&cnt_all, leak_nctx, filename);
1082 
1083 	return (false);
1084 label_write_error:
1085 	prof_dump_close(propagate_err);
1086 label_open_close_error:
1087 	while ((ctx.p = ql_first(&ctx_ql)) != NULL)
1088 		prof_dump_ctx_cleanup(ctx.p, &ctx_ql);
1089 	malloc_mutex_unlock(&prof_dump_mtx);
1090 	return (true);
1091 }
1092 
1093 #define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
1094 #define	VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
1095 static void
prof_dump_filename(char * filename,char v,uint64_t vseq)1096 prof_dump_filename(char *filename, char v, uint64_t vseq)
1097 {
1098 
1099 	cassert(config_prof);
1100 
1101 	if (vseq != VSEQ_INVALID) {
1102 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1103 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1104 		    "%s.%d.%"PRIu64".%c%"PRIu64".heap",
1105 		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
1106 	} else {
1107 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
1108 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1109 		    "%s.%d.%"PRIu64".%c.heap",
1110 		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
1111 	}
1112 	prof_dump_seq++;
1113 }
1114 
1115 static void
prof_fdump(void)1116 prof_fdump(void)
1117 {
1118 	char filename[DUMP_FILENAME_BUFSIZE];
1119 
1120 	cassert(config_prof);
1121 
1122 	if (prof_booted == false)
1123 		return;
1124 
1125 	if (opt_prof_final && opt_prof_prefix[0] != '\0') {
1126 		malloc_mutex_lock(&prof_dump_seq_mtx);
1127 		prof_dump_filename(filename, 'f', VSEQ_INVALID);
1128 		malloc_mutex_unlock(&prof_dump_seq_mtx);
1129 		prof_dump(false, filename, opt_prof_leak);
1130 	}
1131 }
1132 
1133 void
prof_idump(void)1134 prof_idump(void)
1135 {
1136 	prof_tdata_t *prof_tdata;
1137 	char filename[PATH_MAX + 1];
1138 
1139 	cassert(config_prof);
1140 
1141 	if (prof_booted == false)
1142 		return;
1143 	prof_tdata = prof_tdata_get(false);
1144 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1145 		return;
1146 	if (prof_tdata->enq) {
1147 		prof_tdata->enq_idump = true;
1148 		return;
1149 	}
1150 
1151 	if (opt_prof_prefix[0] != '\0') {
1152 		malloc_mutex_lock(&prof_dump_seq_mtx);
1153 		prof_dump_filename(filename, 'i', prof_dump_iseq);
1154 		prof_dump_iseq++;
1155 		malloc_mutex_unlock(&prof_dump_seq_mtx);
1156 		prof_dump(false, filename, false);
1157 	}
1158 }
1159 
1160 bool
prof_mdump(const char * filename)1161 prof_mdump(const char *filename)
1162 {
1163 	char filename_buf[DUMP_FILENAME_BUFSIZE];
1164 
1165 	cassert(config_prof);
1166 
1167 	if (opt_prof == false || prof_booted == false)
1168 		return (true);
1169 
1170 	if (filename == NULL) {
1171 		/* No filename specified, so automatically generate one. */
1172 		if (opt_prof_prefix[0] == '\0')
1173 			return (true);
1174 		malloc_mutex_lock(&prof_dump_seq_mtx);
1175 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1176 		prof_dump_mseq++;
1177 		malloc_mutex_unlock(&prof_dump_seq_mtx);
1178 		filename = filename_buf;
1179 	}
1180 	return (prof_dump(true, filename, false));
1181 }
1182 
1183 void
prof_gdump(void)1184 prof_gdump(void)
1185 {
1186 	prof_tdata_t *prof_tdata;
1187 	char filename[DUMP_FILENAME_BUFSIZE];
1188 
1189 	cassert(config_prof);
1190 
1191 	if (prof_booted == false)
1192 		return;
1193 	prof_tdata = prof_tdata_get(false);
1194 	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1195 		return;
1196 	if (prof_tdata->enq) {
1197 		prof_tdata->enq_gdump = true;
1198 		return;
1199 	}
1200 
1201 	if (opt_prof_prefix[0] != '\0') {
1202 		malloc_mutex_lock(&prof_dump_seq_mtx);
1203 		prof_dump_filename(filename, 'u', prof_dump_useq);
1204 		prof_dump_useq++;
1205 		malloc_mutex_unlock(&prof_dump_seq_mtx);
1206 		prof_dump(false, filename, false);
1207 	}
1208 }
1209 
1210 static void
prof_bt_hash(const void * key,size_t r_hash[2])1211 prof_bt_hash(const void *key, size_t r_hash[2])
1212 {
1213 	prof_bt_t *bt = (prof_bt_t *)key;
1214 
1215 	cassert(config_prof);
1216 
1217 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
1218 }
1219 
1220 static bool
prof_bt_keycomp(const void * k1,const void * k2)1221 prof_bt_keycomp(const void *k1, const void *k2)
1222 {
1223 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
1224 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
1225 
1226 	cassert(config_prof);
1227 
1228 	if (bt1->len != bt2->len)
1229 		return (false);
1230 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1231 }
1232 
1233 prof_tdata_t *
prof_tdata_init(void)1234 prof_tdata_init(void)
1235 {
1236 	prof_tdata_t *prof_tdata;
1237 
1238 	cassert(config_prof);
1239 
1240 	/* Initialize an empty cache for this thread. */
1241 	prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
1242 	if (prof_tdata == NULL)
1243 		return (NULL);
1244 
1245 	if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
1246 	    prof_bt_hash, prof_bt_keycomp)) {
1247 		idalloc(prof_tdata);
1248 		return (NULL);
1249 	}
1250 	ql_new(&prof_tdata->lru_ql);
1251 
1252 	prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX);
1253 	if (prof_tdata->vec == NULL) {
1254 		ckh_delete(&prof_tdata->bt2cnt);
1255 		idalloc(prof_tdata);
1256 		return (NULL);
1257 	}
1258 
1259 	prof_tdata->prng_state = (uint64_t)(uintptr_t)prof_tdata;
1260 	prof_sample_threshold_update(prof_tdata);
1261 
1262 	prof_tdata->enq = false;
1263 	prof_tdata->enq_idump = false;
1264 	prof_tdata->enq_gdump = false;
1265 
1266 	prof_tdata_tsd_set(&prof_tdata);
1267 
1268 	return (prof_tdata);
1269 }
1270 
1271 void
prof_tdata_cleanup(void * arg)1272 prof_tdata_cleanup(void *arg)
1273 {
1274 	prof_thr_cnt_t *cnt;
1275 	prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg;
1276 
1277 	cassert(config_prof);
1278 
1279 	if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
1280 		/*
1281 		 * Another destructor deallocated memory after this destructor
1282 		 * was called.  Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
1283 		 * in order to receive another callback.
1284 		 */
1285 		prof_tdata = PROF_TDATA_STATE_PURGATORY;
1286 		prof_tdata_tsd_set(&prof_tdata);
1287 	} else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
1288 		/*
1289 		 * The previous time this destructor was called, we set the key
1290 		 * to PROF_TDATA_STATE_PURGATORY so that other destructors
1291 		 * wouldn't cause re-creation of the prof_tdata.  This time, do
1292 		 * nothing, so that the destructor will not be called again.
1293 		 */
1294 	} else if (prof_tdata != NULL) {
1295 		/*
1296 		 * Delete the hash table.  All of its contents can still be
1297 		 * iterated over via the LRU.
1298 		 */
1299 		ckh_delete(&prof_tdata->bt2cnt);
1300 		/*
1301 		 * Iteratively merge cnt's into the global stats and delete
1302 		 * them.
1303 		 */
1304 		while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
1305 			ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
1306 			prof_ctx_merge(cnt->ctx, cnt);
1307 			idalloc(cnt);
1308 		}
1309 		idalloc(prof_tdata->vec);
1310 		idalloc(prof_tdata);
1311 		prof_tdata = PROF_TDATA_STATE_PURGATORY;
1312 		prof_tdata_tsd_set(&prof_tdata);
1313 	}
1314 }
1315 
1316 void
prof_boot0(void)1317 prof_boot0(void)
1318 {
1319 
1320 	cassert(config_prof);
1321 
1322 	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
1323 	    sizeof(PROF_PREFIX_DEFAULT));
1324 }
1325 
1326 void
prof_boot1(void)1327 prof_boot1(void)
1328 {
1329 
1330 	cassert(config_prof);
1331 
1332 	/*
1333 	 * opt_prof must be in its final state before any arenas are
1334 	 * initialized, so this function must be executed early.
1335 	 */
1336 
1337 	if (opt_prof_leak && opt_prof == false) {
1338 		/*
1339 		 * Enable opt_prof, but in such a way that profiles are never
1340 		 * automatically dumped.
1341 		 */
1342 		opt_prof = true;
1343 		opt_prof_gdump = false;
1344 	} else if (opt_prof) {
1345 		if (opt_lg_prof_interval >= 0) {
1346 			prof_interval = (((uint64_t)1U) <<
1347 			    opt_lg_prof_interval);
1348 		}
1349 	}
1350 }
1351 
1352 bool
prof_boot2(void)1353 prof_boot2(void)
1354 {
1355 
1356 	cassert(config_prof);
1357 
1358 	if (opt_prof) {
1359 		unsigned i;
1360 
1361 		if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
1362 		    prof_bt_keycomp))
1363 			return (true);
1364 		if (malloc_mutex_init(&bt2ctx_mtx))
1365 			return (true);
1366 		if (prof_tdata_tsd_boot()) {
1367 			malloc_write(
1368 			    "<jemalloc>: Error in pthread_key_create()\n");
1369 			abort();
1370 		}
1371 
1372 		if (malloc_mutex_init(&prof_dump_seq_mtx))
1373 			return (true);
1374 		if (malloc_mutex_init(&prof_dump_mtx))
1375 			return (true);
1376 
1377 		if (atexit(prof_fdump) != 0) {
1378 			malloc_write("<jemalloc>: Error in atexit()\n");
1379 			if (opt_abort)
1380 				abort();
1381 		}
1382 
1383 		ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
1384 		    sizeof(malloc_mutex_t));
1385 		if (ctx_locks == NULL)
1386 			return (true);
1387 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
1388 			if (malloc_mutex_init(&ctx_locks[i]))
1389 				return (true);
1390 		}
1391 	}
1392 
1393 #ifdef JEMALLOC_PROF_LIBGCC
1394 	/*
1395 	 * Cause the backtracing machinery to allocate its internal state
1396 	 * before enabling profiling.
1397 	 */
1398 	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
1399 #endif
1400 
1401 	prof_booted = true;
1402 
1403 	return (false);
1404 }
1405 
1406 void
prof_prefork(void)1407 prof_prefork(void)
1408 {
1409 
1410 	if (opt_prof) {
1411 		unsigned i;
1412 
1413 		malloc_mutex_prefork(&bt2ctx_mtx);
1414 		malloc_mutex_prefork(&prof_dump_seq_mtx);
1415 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
1416 			malloc_mutex_prefork(&ctx_locks[i]);
1417 	}
1418 }
1419 
1420 void
prof_postfork_parent(void)1421 prof_postfork_parent(void)
1422 {
1423 
1424 	if (opt_prof) {
1425 		unsigned i;
1426 
1427 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
1428 			malloc_mutex_postfork_parent(&ctx_locks[i]);
1429 		malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
1430 		malloc_mutex_postfork_parent(&bt2ctx_mtx);
1431 	}
1432 }
1433 
1434 void
prof_postfork_child(void)1435 prof_postfork_child(void)
1436 {
1437 
1438 	if (opt_prof) {
1439 		unsigned i;
1440 
1441 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
1442 			malloc_mutex_postfork_child(&ctx_locks[i]);
1443 		malloc_mutex_postfork_child(&prof_dump_seq_mtx);
1444 		malloc_mutex_postfork_child(&bt2ctx_mtx);
1445 	}
1446 }
1447 
1448 /******************************************************************************/
1449