1 #define JEMALLOC_BACKGROUND_THREAD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4
5 #include "jemalloc/internal/assert.h"
6
7 /******************************************************************************/
8 /* Data. */
9
10 /* This option should be opt-in only. */
11 #define BACKGROUND_THREAD_DEFAULT false
12 /* Read-only after initialization. */
13 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
14 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT;
15
16 /* Used for thread creation, termination and stats. */
17 malloc_mutex_t background_thread_lock;
18 /* Indicates global state. Atomic because decay reads this w/o locking. */
19 atomic_b_t background_thread_enabled_state;
20 size_t n_background_threads;
21 size_t max_background_threads;
22 /* Thread info per-index. */
23 background_thread_info_t *background_thread_info;
24
25 /* False if no necessary runtime support. */
26 bool can_enable_background_thread;
27
28 /******************************************************************************/
29
30 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
31 #include <dlfcn.h>
32
33 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
34 void *(*)(void *), void *__restrict);
35
36 static void
pthread_create_wrapper_init(void)37 pthread_create_wrapper_init(void) {
38 #ifdef JEMALLOC_LAZY_LOCK
39 if (!isthreaded) {
40 isthreaded = true;
41 }
42 #endif
43 }
44
45 int
pthread_create_wrapper(pthread_t * __restrict thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * __restrict arg)46 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
47 void *(*start_routine)(void *), void *__restrict arg) {
48 pthread_create_wrapper_init();
49
50 return pthread_create_fptr(thread, attr, start_routine, arg);
51 }
52 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
53
54 #ifndef JEMALLOC_BACKGROUND_THREAD
55 #define NOT_REACHED { not_reached(); }
background_thread_create(tsd_t * tsd,unsigned arena_ind)56 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
57 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
58 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
59 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
60 arena_decay_t *decay, size_t npages_new) NOT_REACHED
61 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
62 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
63 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
64 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
65 bool background_thread_stats_read(tsdn_t *tsdn,
66 background_thread_stats_t *stats) NOT_REACHED
67 void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
68 #undef NOT_REACHED
69 #else
70
71 static bool background_thread_enabled_at_fork;
72
73 static void
74 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
75 background_thread_wakeup_time_set(tsdn, info, 0);
76 info->npages_to_purge_new = 0;
77 if (config_stats) {
78 info->tot_n_runs = 0;
79 nstime_init(&info->tot_sleep_time, 0);
80 }
81 }
82
83 static inline bool
84 set_current_thread_affinity(UNUSED int cpu) {
85 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
86 cpu_set_t cpuset;
87 CPU_ZERO(&cpuset);
88 CPU_SET(cpu, &cpuset);
89 int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
90
91 return (ret != 0);
92 #else
93 return false;
94 #endif
95 }
96
97 /* Threshold for determining when to wake up the background thread. */
98 #define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
99 #define BILLION UINT64_C(1000000000)
100 /* Minimal sleep interval 100 ms. */
101 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
102
103 static inline size_t
104 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
105 size_t i;
106 uint64_t sum = 0;
107 for (i = 0; i < interval; i++) {
108 sum += decay->backlog[i] * h_steps[i];
109 }
110 for (; i < SMOOTHSTEP_NSTEPS; i++) {
111 sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
112 }
113
114 return (size_t)(sum >> SMOOTHSTEP_BFP);
115 }
116
117 static uint64_t
118 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
119 extents_t *extents) {
120 if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
121 /* Use minimal interval if decay is contended. */
122 return BACKGROUND_THREAD_MIN_INTERVAL_NS;
123 }
124
125 uint64_t interval;
126 ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
127 if (decay_time <= 0) {
128 /* Purging is eagerly done or disabled currently. */
129 interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
130 goto label_done;
131 }
132
133 uint64_t decay_interval_ns = nstime_ns(&decay->interval);
134 assert(decay_interval_ns > 0);
135 size_t npages = extents_npages_get(extents);
136 if (npages == 0) {
137 unsigned i;
138 for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
139 if (decay->backlog[i] > 0) {
140 break;
141 }
142 }
143 if (i == SMOOTHSTEP_NSTEPS) {
144 /* No dirty pages recorded. Sleep indefinitely. */
145 interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
146 goto label_done;
147 }
148 }
149 if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
150 /* Use max interval. */
151 interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
152 goto label_done;
153 }
154
155 size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
156 size_t ub = SMOOTHSTEP_NSTEPS;
157 /* Minimal 2 intervals to ensure reaching next epoch deadline. */
158 lb = (lb < 2) ? 2 : lb;
159 if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
160 (lb + 2 > ub)) {
161 interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
162 goto label_done;
163 }
164
165 assert(lb + 2 <= ub);
166 size_t npurge_lb, npurge_ub;
167 npurge_lb = decay_npurge_after_interval(decay, lb);
168 if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
169 interval = decay_interval_ns * lb;
170 goto label_done;
171 }
172 npurge_ub = decay_npurge_after_interval(decay, ub);
173 if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
174 interval = decay_interval_ns * ub;
175 goto label_done;
176 }
177
178 unsigned n_search = 0;
179 size_t target, npurge;
180 while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
181 && (lb + 2 < ub)) {
182 target = (lb + ub) / 2;
183 npurge = decay_npurge_after_interval(decay, target);
184 if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
185 ub = target;
186 npurge_ub = npurge;
187 } else {
188 lb = target;
189 npurge_lb = npurge;
190 }
191 assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
192 }
193 interval = decay_interval_ns * (ub + lb) / 2;
194 label_done:
195 interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
196 BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
197 malloc_mutex_unlock(tsdn, &decay->mtx);
198
199 return interval;
200 }
201
202 /* Compute purge interval for background threads. */
203 static uint64_t
204 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
205 uint64_t i1, i2;
206 i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
207 &arena->extents_dirty);
208 if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
209 return i1;
210 }
211 i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
212 &arena->extents_muzzy);
213
214 return i1 < i2 ? i1 : i2;
215 }
216
217 static void
218 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
219 uint64_t interval) {
220 if (config_stats) {
221 info->tot_n_runs++;
222 }
223 info->npages_to_purge_new = 0;
224
225 struct timeval tv;
226 /* Specific clock required by timedwait. */
227 gettimeofday(&tv, NULL);
228 nstime_t before_sleep;
229 nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
230
231 int ret;
232 if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
233 assert(background_thread_indefinite_sleep(info));
234 ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
235 assert(ret == 0);
236 } else {
237 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
238 interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
239 /* We need malloc clock (can be different from tv). */
240 nstime_t next_wakeup;
241 nstime_init(&next_wakeup, 0);
242 nstime_update(&next_wakeup);
243 nstime_iadd(&next_wakeup, interval);
244 assert(nstime_ns(&next_wakeup) <
245 BACKGROUND_THREAD_INDEFINITE_SLEEP);
246 background_thread_wakeup_time_set(tsdn, info,
247 nstime_ns(&next_wakeup));
248
249 nstime_t ts_wakeup;
250 nstime_copy(&ts_wakeup, &before_sleep);
251 nstime_iadd(&ts_wakeup, interval);
252 struct timespec ts;
253 ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
254 ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
255
256 assert(!background_thread_indefinite_sleep(info));
257 ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
258 assert(ret == ETIMEDOUT || ret == 0);
259 background_thread_wakeup_time_set(tsdn, info,
260 BACKGROUND_THREAD_INDEFINITE_SLEEP);
261 }
262 if (config_stats) {
263 gettimeofday(&tv, NULL);
264 nstime_t after_sleep;
265 nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
266 if (nstime_compare(&after_sleep, &before_sleep) > 0) {
267 nstime_subtract(&after_sleep, &before_sleep);
268 nstime_add(&info->tot_sleep_time, &after_sleep);
269 }
270 }
271 }
272
273 static bool
274 background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
275 if (unlikely(info->state == background_thread_paused)) {
276 malloc_mutex_unlock(tsdn, &info->mtx);
277 /* Wait on global lock to update status. */
278 malloc_mutex_lock(tsdn, &background_thread_lock);
279 malloc_mutex_unlock(tsdn, &background_thread_lock);
280 malloc_mutex_lock(tsdn, &info->mtx);
281 return true;
282 }
283
284 return false;
285 }
286
287 static inline void
288 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
289 uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
290 unsigned narenas = narenas_total_get();
291
292 for (unsigned i = ind; i < narenas; i += max_background_threads) {
293 arena_t *arena = arena_get(tsdn, i, false);
294 if (!arena) {
295 continue;
296 }
297 arena_decay(tsdn, arena, true, false);
298 if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
299 /* Min interval will be used. */
300 continue;
301 }
302 uint64_t interval = arena_decay_compute_purge_interval(tsdn,
303 arena);
304 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
305 if (min_interval > interval) {
306 min_interval = interval;
307 }
308 }
309 background_thread_sleep(tsdn, info, min_interval);
310 }
311
312 static bool
313 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
314 if (info == &background_thread_info[0]) {
315 malloc_mutex_assert_owner(tsd_tsdn(tsd),
316 &background_thread_lock);
317 } else {
318 malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
319 &background_thread_lock);
320 }
321
322 pre_reentrancy(tsd, NULL);
323 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
324 bool has_thread;
325 assert(info->state != background_thread_paused);
326 if (info->state == background_thread_started) {
327 has_thread = true;
328 info->state = background_thread_stopped;
329 pthread_cond_signal(&info->cond);
330 } else {
331 has_thread = false;
332 }
333 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
334
335 if (!has_thread) {
336 post_reentrancy(tsd);
337 return false;
338 }
339 void *ret;
340 if (pthread_join(info->thread, &ret)) {
341 post_reentrancy(tsd);
342 return true;
343 }
344 assert(ret == NULL);
345 n_background_threads--;
346 post_reentrancy(tsd);
347
348 return false;
349 }
350
351 static void *background_thread_entry(void *ind_arg);
352
353 static int
354 background_thread_create_signals_masked(pthread_t *thread,
355 const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
356 /*
357 * Mask signals during thread creation so that the thread inherits
358 * an empty signal set.
359 */
360 sigset_t set;
361 sigfillset(&set);
362 sigset_t oldset;
363 int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
364 if (mask_err != 0) {
365 return mask_err;
366 }
367 int create_err = pthread_create_wrapper(thread, attr, start_routine,
368 arg);
369 /*
370 * Restore the signal mask. Failure to restore the signal mask here
371 * changes program behavior.
372 */
373 int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
374 if (restore_err != 0) {
375 malloc_printf("<jemalloc>: background thread creation "
376 "failed (%d), and signal mask restoration failed "
377 "(%d)\n", create_err, restore_err);
378 if (opt_abort) {
379 abort();
380 }
381 }
382 return create_err;
383 }
384
385 static bool
386 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
387 bool *created_threads) {
388 bool ret = false;
389 if (likely(*n_created == n_background_threads)) {
390 return ret;
391 }
392
393 tsdn_t *tsdn = tsd_tsdn(tsd);
394 malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
395 for (unsigned i = 1; i < max_background_threads; i++) {
396 if (created_threads[i]) {
397 continue;
398 }
399 background_thread_info_t *info = &background_thread_info[i];
400 malloc_mutex_lock(tsdn, &info->mtx);
401 /*
402 * In case of the background_thread_paused state because of
403 * arena reset, delay the creation.
404 */
405 bool create = (info->state == background_thread_started);
406 malloc_mutex_unlock(tsdn, &info->mtx);
407 if (!create) {
408 continue;
409 }
410
411 pre_reentrancy(tsd, NULL);
412 int err = background_thread_create_signals_masked(&info->thread,
413 NULL, background_thread_entry, (void *)(uintptr_t)i);
414 post_reentrancy(tsd);
415
416 if (err == 0) {
417 (*n_created)++;
418 created_threads[i] = true;
419 } else {
420 malloc_printf("<jemalloc>: background thread "
421 "creation failed (%d)\n", err);
422 if (opt_abort) {
423 abort();
424 }
425 }
426 /* Return to restart the loop since we unlocked. */
427 ret = true;
428 break;
429 }
430 malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
431
432 return ret;
433 }
434
435 static void
436 background_thread0_work(tsd_t *tsd) {
437 /* Thread0 is also responsible for launching / terminating threads. */
438 VARIABLE_ARRAY(bool, created_threads, max_background_threads);
439 unsigned i;
440 for (i = 1; i < max_background_threads; i++) {
441 created_threads[i] = false;
442 }
443 /* Start working, and create more threads when asked. */
444 unsigned n_created = 1;
445 while (background_thread_info[0].state != background_thread_stopped) {
446 if (background_thread_pause_check(tsd_tsdn(tsd),
447 &background_thread_info[0])) {
448 continue;
449 }
450 if (check_background_thread_creation(tsd, &n_created,
451 (bool *)&created_threads)) {
452 continue;
453 }
454 background_work_sleep_once(tsd_tsdn(tsd),
455 &background_thread_info[0], 0);
456 }
457
458 /*
459 * Shut down other threads at exit. Note that the ctl thread is holding
460 * the global background_thread mutex (and is waiting) for us.
461 */
462 assert(!background_thread_enabled());
463 for (i = 1; i < max_background_threads; i++) {
464 background_thread_info_t *info = &background_thread_info[i];
465 assert(info->state != background_thread_paused);
466 if (created_threads[i]) {
467 background_threads_disable_single(tsd, info);
468 } else {
469 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
470 if (info->state != background_thread_stopped) {
471 /* The thread was not created. */
472 assert(info->state ==
473 background_thread_started);
474 n_background_threads--;
475 info->state = background_thread_stopped;
476 }
477 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
478 }
479 }
480 background_thread_info[0].state = background_thread_stopped;
481 assert(n_background_threads == 1);
482 }
483
484 static void
485 background_work(tsd_t *tsd, unsigned ind) {
486 background_thread_info_t *info = &background_thread_info[ind];
487
488 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
489 background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
490 BACKGROUND_THREAD_INDEFINITE_SLEEP);
491 if (ind == 0) {
492 background_thread0_work(tsd);
493 } else {
494 while (info->state != background_thread_stopped) {
495 if (background_thread_pause_check(tsd_tsdn(tsd),
496 info)) {
497 continue;
498 }
499 background_work_sleep_once(tsd_tsdn(tsd), info, ind);
500 }
501 }
502 assert(info->state == background_thread_stopped);
503 background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
504 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
505 }
506
507 static void *
508 background_thread_entry(void *ind_arg) {
509 unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
510 assert(thread_ind < max_background_threads);
511 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
512 pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
513 #endif
514 if (opt_percpu_arena != percpu_arena_disabled) {
515 set_current_thread_affinity((int)thread_ind);
516 }
517 /*
518 * Start periodic background work. We use internal tsd which avoids
519 * side effects, for example triggering new arena creation (which in
520 * turn triggers another background thread creation).
521 */
522 background_work(tsd_internal_fetch(), thread_ind);
523 assert(pthread_equal(pthread_self(),
524 background_thread_info[thread_ind].thread));
525
526 return NULL;
527 }
528
529 static void
530 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
531 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
532 info->state = background_thread_started;
533 background_thread_info_init(tsd_tsdn(tsd), info);
534 n_background_threads++;
535 }
536
537 /* Create a new background thread if needed. */
538 bool
539 background_thread_create(tsd_t *tsd, unsigned arena_ind) {
540 assert(have_background_thread);
541 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
542
543 /* We create at most NCPUs threads. */
544 size_t thread_ind = arena_ind % max_background_threads;
545 background_thread_info_t *info = &background_thread_info[thread_ind];
546
547 bool need_new_thread;
548 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
549 need_new_thread = background_thread_enabled() &&
550 (info->state == background_thread_stopped);
551 if (need_new_thread) {
552 background_thread_init(tsd, info);
553 }
554 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
555 if (!need_new_thread) {
556 return false;
557 }
558 if (arena_ind != 0) {
559 /* Threads are created asynchronously by Thread 0. */
560 background_thread_info_t *t0 = &background_thread_info[0];
561 malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
562 assert(t0->state == background_thread_started);
563 pthread_cond_signal(&t0->cond);
564 malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
565
566 return false;
567 }
568
569 pre_reentrancy(tsd, NULL);
570 /*
571 * To avoid complications (besides reentrancy), create internal
572 * background threads with the underlying pthread_create.
573 */
574 int err = background_thread_create_signals_masked(&info->thread, NULL,
575 background_thread_entry, (void *)thread_ind);
576 post_reentrancy(tsd);
577
578 if (err != 0) {
579 malloc_printf("<jemalloc>: arena 0 background thread creation "
580 "failed (%d)\n", err);
581 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
582 info->state = background_thread_stopped;
583 n_background_threads--;
584 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
585
586 return true;
587 }
588
589 return false;
590 }
591
592 bool
593 background_threads_enable(tsd_t *tsd) {
594 assert(n_background_threads == 0);
595 assert(background_thread_enabled());
596 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
597
598 VARIABLE_ARRAY(bool, marked, max_background_threads);
599 unsigned i, nmarked;
600 for (i = 0; i < max_background_threads; i++) {
601 marked[i] = false;
602 }
603 nmarked = 0;
604 /* Thread 0 is required and created at the end. */
605 marked[0] = true;
606 /* Mark the threads we need to create for thread 0. */
607 unsigned n = narenas_total_get();
608 for (i = 1; i < n; i++) {
609 if (marked[i % max_background_threads] ||
610 arena_get(tsd_tsdn(tsd), i, false) == NULL) {
611 continue;
612 }
613 background_thread_info_t *info = &background_thread_info[
614 i % max_background_threads];
615 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
616 assert(info->state == background_thread_stopped);
617 background_thread_init(tsd, info);
618 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
619 marked[i % max_background_threads] = true;
620 if (++nmarked == max_background_threads) {
621 break;
622 }
623 }
624
625 return background_thread_create(tsd, 0);
626 }
627
628 bool
629 background_threads_disable(tsd_t *tsd) {
630 assert(!background_thread_enabled());
631 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
632
633 /* Thread 0 will be responsible for terminating other threads. */
634 if (background_threads_disable_single(tsd,
635 &background_thread_info[0])) {
636 return true;
637 }
638 assert(n_background_threads == 0);
639
640 return false;
641 }
642
643 /* Check if we need to signal the background thread early. */
644 void
645 background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
646 arena_decay_t *decay, size_t npages_new) {
647 background_thread_info_t *info = arena_background_thread_info_get(
648 arena);
649 if (malloc_mutex_trylock(tsdn, &info->mtx)) {
650 /*
651 * Background thread may hold the mutex for a long period of
652 * time. We'd like to avoid the variance on application
653 * threads. So keep this non-blocking, and leave the work to a
654 * future epoch.
655 */
656 return;
657 }
658
659 if (info->state != background_thread_started) {
660 goto label_done;
661 }
662 if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
663 goto label_done;
664 }
665
666 ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
667 if (decay_time <= 0) {
668 /* Purging is eagerly done or disabled currently. */
669 goto label_done_unlock2;
670 }
671 uint64_t decay_interval_ns = nstime_ns(&decay->interval);
672 assert(decay_interval_ns > 0);
673
674 nstime_t diff;
675 nstime_init(&diff, background_thread_wakeup_time_get(info));
676 if (nstime_compare(&diff, &decay->epoch) <= 0) {
677 goto label_done_unlock2;
678 }
679 nstime_subtract(&diff, &decay->epoch);
680 if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
681 goto label_done_unlock2;
682 }
683
684 if (npages_new > 0) {
685 size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
686 /*
687 * Compute how many new pages we would need to purge by the next
688 * wakeup, which is used to determine if we should signal the
689 * background thread.
690 */
691 uint64_t npurge_new;
692 if (n_epoch >= SMOOTHSTEP_NSTEPS) {
693 npurge_new = npages_new;
694 } else {
695 uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
696 assert(h_steps_max >=
697 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
698 npurge_new = npages_new * (h_steps_max -
699 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
700 npurge_new >>= SMOOTHSTEP_BFP;
701 }
702 info->npages_to_purge_new += npurge_new;
703 }
704
705 bool should_signal;
706 if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
707 should_signal = true;
708 } else if (unlikely(background_thread_indefinite_sleep(info)) &&
709 (extents_npages_get(&arena->extents_dirty) > 0 ||
710 extents_npages_get(&arena->extents_muzzy) > 0 ||
711 info->npages_to_purge_new > 0)) {
712 should_signal = true;
713 } else {
714 should_signal = false;
715 }
716
717 if (should_signal) {
718 info->npages_to_purge_new = 0;
719 pthread_cond_signal(&info->cond);
720 }
721 label_done_unlock2:
722 malloc_mutex_unlock(tsdn, &decay->mtx);
723 label_done:
724 malloc_mutex_unlock(tsdn, &info->mtx);
725 }
726
727 void
728 background_thread_prefork0(tsdn_t *tsdn) {
729 malloc_mutex_prefork(tsdn, &background_thread_lock);
730 background_thread_enabled_at_fork = background_thread_enabled();
731 }
732
733 void
734 background_thread_prefork1(tsdn_t *tsdn) {
735 for (unsigned i = 0; i < max_background_threads; i++) {
736 malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
737 }
738 }
739
740 void
741 background_thread_postfork_parent(tsdn_t *tsdn) {
742 for (unsigned i = 0; i < max_background_threads; i++) {
743 malloc_mutex_postfork_parent(tsdn,
744 &background_thread_info[i].mtx);
745 }
746 malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
747 }
748
749 void
750 background_thread_postfork_child(tsdn_t *tsdn) {
751 for (unsigned i = 0; i < max_background_threads; i++) {
752 malloc_mutex_postfork_child(tsdn,
753 &background_thread_info[i].mtx);
754 }
755 malloc_mutex_postfork_child(tsdn, &background_thread_lock);
756 if (!background_thread_enabled_at_fork) {
757 return;
758 }
759
760 /* Clear background_thread state (reset to disabled for child). */
761 malloc_mutex_lock(tsdn, &background_thread_lock);
762 n_background_threads = 0;
763 background_thread_enabled_set(tsdn, false);
764 for (unsigned i = 0; i < max_background_threads; i++) {
765 background_thread_info_t *info = &background_thread_info[i];
766 malloc_mutex_lock(tsdn, &info->mtx);
767 info->state = background_thread_stopped;
768 int ret = pthread_cond_init(&info->cond, NULL);
769 assert(ret == 0);
770 background_thread_info_init(tsdn, info);
771 malloc_mutex_unlock(tsdn, &info->mtx);
772 }
773 malloc_mutex_unlock(tsdn, &background_thread_lock);
774 }
775
776 bool
777 background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
778 assert(config_stats);
779 malloc_mutex_lock(tsdn, &background_thread_lock);
780 if (!background_thread_enabled()) {
781 malloc_mutex_unlock(tsdn, &background_thread_lock);
782 return true;
783 }
784
785 stats->num_threads = n_background_threads;
786 uint64_t num_runs = 0;
787 nstime_init(&stats->run_interval, 0);
788 for (unsigned i = 0; i < max_background_threads; i++) {
789 background_thread_info_t *info = &background_thread_info[i];
790 malloc_mutex_lock(tsdn, &info->mtx);
791 if (info->state != background_thread_stopped) {
792 num_runs += info->tot_n_runs;
793 nstime_add(&stats->run_interval, &info->tot_sleep_time);
794 }
795 malloc_mutex_unlock(tsdn, &info->mtx);
796 }
797 stats->num_runs = num_runs;
798 if (num_runs > 0) {
799 nstime_idivide(&stats->run_interval, num_runs);
800 }
801 malloc_mutex_unlock(tsdn, &background_thread_lock);
802
803 return false;
804 }
805
806 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
807 #undef BILLION
808 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
809
810 static bool
811 pthread_create_fptr_init(void) {
812 if (pthread_create_fptr != NULL) {
813 return false;
814 }
815 pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
816 if (pthread_create_fptr == NULL) {
817 can_enable_background_thread = false;
818 if (config_lazy_lock || opt_background_thread) {
819 malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
820 "\"pthread_create\")\n");
821 abort();
822 }
823 } else {
824 can_enable_background_thread = true;
825 }
826
827 return false;
828 }
829
830 /*
831 * When lazy lock is enabled, we need to make sure setting isthreaded before
832 * taking any background_thread locks. This is called early in ctl (instead of
833 * wait for the pthread_create calls to trigger) because the mutex is required
834 * before creating background threads.
835 */
836 void
837 background_thread_ctl_init(tsdn_t *tsdn) {
838 malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
839 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
840 pthread_create_fptr_init();
841 pthread_create_wrapper_init();
842 #endif
843 }
844
845 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
846
847 bool
848 background_thread_boot0(void) {
849 if (!have_background_thread && opt_background_thread) {
850 malloc_printf("<jemalloc>: option background_thread currently "
851 "supports pthread only\n");
852 return true;
853 }
854 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
855 if ((config_lazy_lock || opt_background_thread) &&
856 pthread_create_fptr_init()) {
857 return true;
858 }
859 #endif
860 return false;
861 }
862
863 bool
background_thread_boot1(tsdn_t * tsdn)864 background_thread_boot1(tsdn_t *tsdn) {
865 #ifdef JEMALLOC_BACKGROUND_THREAD
866 assert(have_background_thread);
867 assert(narenas_total_get() > 0);
868
869 if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT &&
870 ncpus < MAX_BACKGROUND_THREAD_LIMIT) {
871 opt_max_background_threads = ncpus;
872 }
873 max_background_threads = opt_max_background_threads;
874
875 background_thread_enabled_set(tsdn, opt_background_thread);
876 if (malloc_mutex_init(&background_thread_lock,
877 "background_thread_global",
878 WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
879 malloc_mutex_rank_exclusive)) {
880 return true;
881 }
882
883 background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
884 b0get(), opt_max_background_threads *
885 sizeof(background_thread_info_t), CACHELINE);
886 if (background_thread_info == NULL) {
887 return true;
888 }
889
890 for (unsigned i = 0; i < max_background_threads; i++) {
891 background_thread_info_t *info = &background_thread_info[i];
892 /* Thread mutex is rank_inclusive because of thread0. */
893 if (malloc_mutex_init(&info->mtx, "background_thread",
894 WITNESS_RANK_BACKGROUND_THREAD,
895 malloc_mutex_address_ordered)) {
896 return true;
897 }
898 if (pthread_cond_init(&info->cond, NULL)) {
899 return true;
900 }
901 malloc_mutex_lock(tsdn, &info->mtx);
902 info->state = background_thread_stopped;
903 background_thread_info_init(tsdn, info);
904 malloc_mutex_unlock(tsdn, &info->mtx);
905 }
906 #endif
907
908 return false;
909 }
910