• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************/
2 #ifdef JEMALLOC_H_TYPES
3 
4 typedef struct prof_bt_s prof_bt_t;
5 typedef struct prof_cnt_s prof_cnt_t;
6 typedef struct prof_tctx_s prof_tctx_t;
7 typedef struct prof_gctx_s prof_gctx_t;
8 typedef struct prof_tdata_s prof_tdata_t;
9 
10 /* Option defaults. */
11 #ifdef JEMALLOC_PROF
12 #  define PROF_PREFIX_DEFAULT		"jeprof"
13 #else
14 #  define PROF_PREFIX_DEFAULT		""
15 #endif
16 #define	LG_PROF_SAMPLE_DEFAULT		19
17 #define	LG_PROF_INTERVAL_DEFAULT	-1
18 
19 /*
20  * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
21  * is based on __builtin_return_address() necessarily has a hard-coded number
22  * of backtrace frame handlers, and should be kept in sync with this setting.
23  */
24 #define	PROF_BT_MAX			128
25 
26 /* Initial hash table size. */
27 #define	PROF_CKH_MINITEMS		64
28 
29 /* Size of memory buffer to use when writing dump files. */
30 #define	PROF_DUMP_BUFSIZE		65536
31 
32 /* Size of stack-allocated buffer used by prof_printf(). */
33 #define	PROF_PRINTF_BUFSIZE		128
34 
35 /*
36  * Number of mutexes shared among all gctx's.  No space is allocated for these
37  * unless profiling is enabled, so it's okay to over-provision.
38  */
39 #define	PROF_NCTX_LOCKS			1024
40 
41 /*
42  * Number of mutexes shared among all tdata's.  No space is allocated for these
43  * unless profiling is enabled, so it's okay to over-provision.
44  */
45 #define	PROF_NTDATA_LOCKS		256
46 
47 /*
48  * prof_tdata pointers close to NULL are used to encode state information that
49  * is used for cleaning up during thread shutdown.
50  */
51 #define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
52 #define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
53 #define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
54 
55 #endif /* JEMALLOC_H_TYPES */
56 /******************************************************************************/
57 #ifdef JEMALLOC_H_STRUCTS
58 
59 struct prof_bt_s {
60 	/* Backtrace, stored as len program counters. */
61 	void		**vec;
62 	unsigned	len;
63 };
64 
65 #ifdef JEMALLOC_PROF_LIBGCC
66 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
67 typedef struct {
68 	prof_bt_t	*bt;
69 	unsigned	max;
70 } prof_unwind_data_t;
71 #endif
72 
73 struct prof_cnt_s {
74 	/* Profiling counters. */
75 	uint64_t	curobjs;
76 	uint64_t	curbytes;
77 	uint64_t	accumobjs;
78 	uint64_t	accumbytes;
79 };
80 
81 typedef enum {
82 	prof_tctx_state_initializing,
83 	prof_tctx_state_nominal,
84 	prof_tctx_state_dumping,
85 	prof_tctx_state_purgatory /* Dumper must finish destroying. */
86 } prof_tctx_state_t;
87 
88 struct prof_tctx_s {
89 	/* Thread data for thread that performed the allocation. */
90 	prof_tdata_t		*tdata;
91 
92 	/*
93 	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
94 	 * defunct during teardown.
95 	 */
96 	uint64_t		thr_uid;
97 	uint64_t		thr_discrim;
98 
99 	/* Profiling counters, protected by tdata->lock. */
100 	prof_cnt_t		cnts;
101 
102 	/* Associated global context. */
103 	prof_gctx_t		*gctx;
104 
105 	/*
106 	 * UID that distinguishes multiple tctx's created by the same thread,
107 	 * but coexisting in gctx->tctxs.  There are two ways that such
108 	 * coexistence can occur:
109 	 * - A dumper thread can cause a tctx to be retained in the purgatory
110 	 *   state.
111 	 * - Although a single "producer" thread must create all tctx's which
112 	 *   share the same thr_uid, multiple "consumers" can each concurrently
113 	 *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
114 	 *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
115 	 *   threshold can be hit again before the first consumer finishes
116 	 *   executing prof_tctx_destroy().
117 	 */
118 	uint64_t		tctx_uid;
119 
120 	/* Linkage into gctx's tctxs. */
121 	rb_node(prof_tctx_t)	tctx_link;
122 
123 	/*
124 	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
125 	 * sample vs destroy race.
126 	 */
127 	bool			prepared;
128 
129 	/* Current dump-related state, protected by gctx->lock. */
130 	prof_tctx_state_t	state;
131 
132 	/*
133 	 * Copy of cnts snapshotted during early dump phase, protected by
134 	 * dump_mtx.
135 	 */
136 	prof_cnt_t		dump_cnts;
137 };
138 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
139 
140 struct prof_gctx_s {
141 	/* Protects nlimbo, cnt_summed, and tctxs. */
142 	malloc_mutex_t		*lock;
143 
144 	/*
145 	 * Number of threads that currently cause this gctx to be in a state of
146 	 * limbo due to one of:
147 	 *   - Initializing this gctx.
148 	 *   - Initializing per thread counters associated with this gctx.
149 	 *   - Preparing to destroy this gctx.
150 	 *   - Dumping a heap profile that includes this gctx.
151 	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
152 	 * gctx.
153 	 */
154 	unsigned		nlimbo;
155 
156 	/*
157 	 * Tree of profile counters, one for each thread that has allocated in
158 	 * this context.
159 	 */
160 	prof_tctx_tree_t	tctxs;
161 
162 	/* Linkage for tree of contexts to be dumped. */
163 	rb_node(prof_gctx_t)	dump_link;
164 
165 	/* Temporary storage for summation during dump. */
166 	prof_cnt_t		cnt_summed;
167 
168 	/* Associated backtrace. */
169 	prof_bt_t		bt;
170 
171 	/* Backtrace vector, variable size, referred to by bt. */
172 	void			*vec[1];
173 };
174 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
175 
176 struct prof_tdata_s {
177 	malloc_mutex_t		*lock;
178 
179 	/* Monotonically increasing unique thread identifier. */
180 	uint64_t		thr_uid;
181 
182 	/*
183 	 * Monotonically increasing discriminator among tdata structures
184 	 * associated with the same thr_uid.
185 	 */
186 	uint64_t		thr_discrim;
187 
188 	/* Included in heap profile dumps if non-NULL. */
189 	char			*thread_name;
190 
191 	bool			attached;
192 	bool			expired;
193 
194 	rb_node(prof_tdata_t)	tdata_link;
195 
196 	/*
197 	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
198 	 * necessary when incrementing this field, because only one thread ever
199 	 * does so.
200 	 */
201 	uint64_t		tctx_uid_next;
202 
203 	/*
204 	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
205 	 * backtraces for which it has non-zero allocation/deallocation counters
206 	 * associated with thread-specific prof_tctx_t objects.  Other threads
207 	 * may write to prof_tctx_t contents when freeing associated objects.
208 	 */
209 	ckh_t			bt2tctx;
210 
211 	/* Sampling state. */
212 	uint64_t		prng_state;
213 	uint64_t		bytes_until_sample;
214 
215 	/* State used to avoid dumping while operating on prof internals. */
216 	bool			enq;
217 	bool			enq_idump;
218 	bool			enq_gdump;
219 
220 	/*
221 	 * Set to true during an early dump phase for tdata's which are
222 	 * currently being dumped.  New threads' tdata's have this initialized
223 	 * to false so that they aren't accidentally included in later dump
224 	 * phases.
225 	 */
226 	bool			dumping;
227 
228 	/*
229 	 * True if profiling is active for this tdata's thread
230 	 * (thread.prof.active mallctl).
231 	 */
232 	bool			active;
233 
234 	/* Temporary storage for summation during dump. */
235 	prof_cnt_t		cnt_summed;
236 
237 	/* Backtrace vector, used for calls to prof_backtrace(). */
238 	void			*vec[PROF_BT_MAX];
239 };
240 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
241 
242 #endif /* JEMALLOC_H_STRUCTS */
243 /******************************************************************************/
244 #ifdef JEMALLOC_H_EXTERNS
245 
246 extern bool	opt_prof;
247 extern bool	opt_prof_active;
248 extern bool	opt_prof_thread_active_init;
249 extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
250 extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
251 extern bool	opt_prof_gdump;       /* High-water memory dumping. */
252 extern bool	opt_prof_final;       /* Final profile dumping. */
253 extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
254 extern bool	opt_prof_accum;       /* Report cumulative bytes. */
255 extern char	opt_prof_prefix[
256     /* Minimize memory bloat for non-prof builds. */
257 #ifdef JEMALLOC_PROF
258     PATH_MAX +
259 #endif
260     1];
261 
262 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
263 extern bool	prof_active;
264 
265 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
266 extern bool	prof_gdump_val;
267 
268 /*
269  * Profile dump interval, measured in bytes allocated.  Each arena triggers a
270  * profile dump when it reaches this threshold.  The effect is that the
271  * interval between profile dumps averages prof_interval, though the actual
272  * interval between dumps will tend to be sporadic, and the interval will be a
273  * maximum of approximately (prof_interval * narenas).
274  */
275 extern uint64_t	prof_interval;
276 
277 /*
278  * Initialized as opt_lg_prof_sample, and potentially modified during profiling
279  * resets.
280  */
281 extern size_t	lg_prof_sample;
282 
283 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
284 void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
285     prof_tctx_t *tctx);
286 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
287 void	bt_init(prof_bt_t *bt, void **vec);
288 void	prof_backtrace(prof_bt_t *bt);
289 prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
290 #ifdef JEMALLOC_JET
291 size_t	prof_tdata_count(void);
292 size_t	prof_bt_count(void);
293 const prof_cnt_t *prof_cnt_all(void);
294 typedef int (prof_dump_open_t)(bool, const char *);
295 extern prof_dump_open_t *prof_dump_open;
296 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
297 extern prof_dump_header_t *prof_dump_header;
298 #endif
299 void	prof_idump(tsdn_t *tsdn);
300 bool	prof_mdump(tsd_t *tsd, const char *filename);
301 void	prof_gdump(tsdn_t *tsdn);
302 prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
303 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
304 void	prof_reset(tsd_t *tsd, size_t lg_sample);
305 void	prof_tdata_cleanup(tsd_t *tsd);
306 bool	prof_active_get(tsdn_t *tsdn);
307 bool	prof_active_set(tsdn_t *tsdn, bool active);
308 const char	*prof_thread_name_get(tsd_t *tsd);
309 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
310 bool	prof_thread_active_get(tsd_t *tsd);
311 bool	prof_thread_active_set(tsd_t *tsd, bool active);
312 bool	prof_thread_active_init_get(tsdn_t *tsdn);
313 bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
314 bool	prof_gdump_get(tsdn_t *tsdn);
315 bool	prof_gdump_set(tsdn_t *tsdn, bool active);
316 void	prof_boot0(void);
317 void	prof_boot1(void);
318 bool	prof_boot2(tsd_t *tsd);
319 void	prof_prefork0(tsdn_t *tsdn);
320 void	prof_prefork1(tsdn_t *tsdn);
321 void	prof_postfork_parent(tsdn_t *tsdn);
322 void	prof_postfork_child(tsdn_t *tsdn);
323 void	prof_sample_threshold_update(prof_tdata_t *tdata);
324 
325 #endif /* JEMALLOC_H_EXTERNS */
326 /******************************************************************************/
327 #ifdef JEMALLOC_H_INLINES
328 
329 #ifndef JEMALLOC_ENABLE_INLINE
330 bool	prof_active_get_unlocked(void);
331 bool	prof_gdump_get_unlocked(void);
332 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
333 prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
334 void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
335     prof_tctx_t *tctx);
336 void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
337     const void *old_ptr, prof_tctx_t *tctx);
338 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
339     prof_tdata_t **tdata_out);
340 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
341     bool update);
342 void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
343     prof_tctx_t *tctx);
344 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
345     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
346     size_t old_usize, prof_tctx_t *old_tctx);
347 void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
348 #endif
349 
350 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
351 JEMALLOC_ALWAYS_INLINE bool
prof_active_get_unlocked(void)352 prof_active_get_unlocked(void)
353 {
354 
355 	/*
356 	 * Even if opt_prof is true, sampling can be temporarily disabled by
357 	 * setting prof_active to false.  No locking is used when reading
358 	 * prof_active in the fast path, so there are no guarantees regarding
359 	 * how long it will take for all threads to notice state changes.
360 	 */
361 	return (prof_active);
362 }
363 
364 JEMALLOC_ALWAYS_INLINE bool
prof_gdump_get_unlocked(void)365 prof_gdump_get_unlocked(void)
366 {
367 
368 	/*
369 	 * No locking is used when reading prof_gdump_val in the fast path, so
370 	 * there are no guarantees regarding how long it will take for all
371 	 * threads to notice state changes.
372 	 */
373 	return (prof_gdump_val);
374 }
375 
376 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
prof_tdata_get(tsd_t * tsd,bool create)377 prof_tdata_get(tsd_t *tsd, bool create)
378 {
379 	prof_tdata_t *tdata;
380 
381 	cassert(config_prof);
382 
383 	tdata = tsd_prof_tdata_get(tsd);
384 	if (create) {
385 		if (unlikely(tdata == NULL)) {
386 			if (tsd_nominal(tsd)) {
387 				tdata = prof_tdata_init(tsd);
388 				tsd_prof_tdata_set(tsd, tdata);
389 			}
390 		} else if (unlikely(tdata->expired)) {
391 			tdata = prof_tdata_reinit(tsd, tdata);
392 			tsd_prof_tdata_set(tsd, tdata);
393 		}
394 		assert(tdata == NULL || tdata->attached);
395 	}
396 
397 	return (tdata);
398 }
399 
400 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
prof_tctx_get(tsdn_t * tsdn,const void * ptr)401 prof_tctx_get(tsdn_t *tsdn, const void *ptr)
402 {
403 
404 	cassert(config_prof);
405 	assert(ptr != NULL);
406 
407 	return (arena_prof_tctx_get(tsdn, ptr));
408 }
409 
410 JEMALLOC_ALWAYS_INLINE void
prof_tctx_set(tsdn_t * tsdn,const void * ptr,size_t usize,prof_tctx_t * tctx)411 prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
412 {
413 
414 	cassert(config_prof);
415 	assert(ptr != NULL);
416 
417 	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
418 }
419 
420 JEMALLOC_ALWAYS_INLINE void
prof_tctx_reset(tsdn_t * tsdn,const void * ptr,size_t usize,const void * old_ptr,prof_tctx_t * old_tctx)421 prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
422     prof_tctx_t *old_tctx)
423 {
424 
425 	cassert(config_prof);
426 	assert(ptr != NULL);
427 
428 	arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
429 }
430 
431 JEMALLOC_ALWAYS_INLINE bool
prof_sample_accum_update(tsd_t * tsd,size_t usize,bool update,prof_tdata_t ** tdata_out)432 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
433     prof_tdata_t **tdata_out)
434 {
435 	prof_tdata_t *tdata;
436 
437 	cassert(config_prof);
438 
439 	tdata = prof_tdata_get(tsd, true);
440 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
441 		tdata = NULL;
442 
443 	if (tdata_out != NULL)
444 		*tdata_out = tdata;
445 
446 	if (unlikely(tdata == NULL))
447 		return (true);
448 
449 	if (likely(tdata->bytes_until_sample >= usize)) {
450 		if (update)
451 			tdata->bytes_until_sample -= usize;
452 		return (true);
453 	} else {
454 		/* Compute new sample threshold. */
455 		if (update)
456 			prof_sample_threshold_update(tdata);
457 		return (!tdata->active);
458 	}
459 }
460 
461 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
prof_alloc_prep(tsd_t * tsd,size_t usize,bool prof_active,bool update)462 prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
463 {
464 	prof_tctx_t *ret;
465 	prof_tdata_t *tdata;
466 	prof_bt_t bt;
467 
468 	assert(usize == s2u(usize));
469 
470 	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
471 	    &tdata)))
472 		ret = (prof_tctx_t *)(uintptr_t)1U;
473 	else {
474 		bt_init(&bt, tdata->vec);
475 		prof_backtrace(&bt);
476 		ret = prof_lookup(tsd, &bt);
477 	}
478 
479 	return (ret);
480 }
481 
482 JEMALLOC_ALWAYS_INLINE void
prof_malloc(tsdn_t * tsdn,const void * ptr,size_t usize,prof_tctx_t * tctx)483 prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
484 {
485 
486 	cassert(config_prof);
487 	assert(ptr != NULL);
488 	assert(usize == isalloc(tsdn, ptr, true));
489 
490 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
491 		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
492 	else
493 		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
494 }
495 
496 JEMALLOC_ALWAYS_INLINE void
prof_realloc(tsd_t * tsd,const void * ptr,size_t usize,prof_tctx_t * tctx,bool prof_active,bool updated,const void * old_ptr,size_t old_usize,prof_tctx_t * old_tctx)497 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
498     bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
499     prof_tctx_t *old_tctx)
500 {
501 	bool sampled, old_sampled;
502 
503 	cassert(config_prof);
504 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
505 
506 	if (prof_active && !updated && ptr != NULL) {
507 		assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
508 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
509 			/*
510 			 * Don't sample.  The usize passed to prof_alloc_prep()
511 			 * was larger than what actually got allocated, so a
512 			 * backtrace was captured for this allocation, even
513 			 * though its actual usize was insufficient to cross the
514 			 * sample threshold.
515 			 */
516 			prof_alloc_rollback(tsd, tctx, true);
517 			tctx = (prof_tctx_t *)(uintptr_t)1U;
518 		}
519 	}
520 
521 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
522 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
523 
524 	if (unlikely(sampled))
525 		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
526 	else
527 		prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
528 
529 	if (unlikely(old_sampled))
530 		prof_free_sampled_object(tsd, old_usize, old_tctx);
531 }
532 
533 JEMALLOC_ALWAYS_INLINE void
prof_free(tsd_t * tsd,const void * ptr,size_t usize)534 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
535 {
536 	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
537 
538 	cassert(config_prof);
539 	assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
540 
541 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
542 		prof_free_sampled_object(tsd, usize, tctx);
543 }
544 #endif
545 
546 #endif /* JEMALLOC_H_INLINES */
547 /******************************************************************************/
548