1 /* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
6 #include <linux/sched/mm.h>
7 #include <linux/proc_fs.h>
8 #include <linux/smp.h>
9 #include <linux/init.h>
10 #include <linux/notifier.h>
11 #include <linux/sched/signal.h>
12 #include <linux/sched/hotplug.h>
13 #include <linux/sched/isolation.h>
14 #include <linux/sched/task.h>
15 #include <linux/sched/smt.h>
16 #include <linux/unistd.h>
17 #include <linux/cpu.h>
18 #include <linux/oom.h>
19 #include <linux/rcupdate.h>
20 #include <linux/export.h>
21 #include <linux/bug.h>
22 #include <linux/kthread.h>
23 #include <linux/stop_machine.h>
24 #include <linux/mutex.h>
25 #include <linux/gfp.h>
26 #include <linux/suspend.h>
27 #include <linux/lockdep.h>
28 #include <linux/tick.h>
29 #include <linux/irq.h>
30 #include <linux/nmi.h>
31 #include <linux/smpboot.h>
32 #include <linux/relay.h>
33 #include <linux/slab.h>
34 #include <linux/percpu-rwsem.h>
35 #include <linux/cpuset.h>
36 #include <linux/random.h>
37
38 #include <trace/events/power.h>
39 #define CREATE_TRACE_POINTS
40 #include <trace/events/cpuhp.h>
41
42 #include "smpboot.h"
43
44 /**
45 * cpuhp_cpu_state - Per cpu hotplug state storage
46 * @state: The current cpu state
47 * @target: The target state
48 * @thread: Pointer to the hotplug thread
49 * @should_run: Thread should execute
50 * @rollback: Perform a rollback
51 * @single: Single callback invocation
52 * @bringup: Single callback bringup or teardown selector
53 * @cb_state: The state for a single callback (install/uninstall)
54 * @result: Result of the operation
55 * @done_up: Signal completion to the issuer of the task for cpu-up
56 * @done_down: Signal completion to the issuer of the task for cpu-down
57 */
58 struct cpuhp_cpu_state {
59 enum cpuhp_state state;
60 enum cpuhp_state target;
61 enum cpuhp_state fail;
62 #ifdef CONFIG_SMP
63 struct task_struct *thread;
64 bool should_run;
65 bool rollback;
66 bool single;
67 bool bringup;
68 struct hlist_node *node;
69 struct hlist_node *last;
70 enum cpuhp_state cb_state;
71 int result;
72 struct completion done_up;
73 struct completion done_down;
74 #endif
75 };
76
77 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
78 .fail = CPUHP_INVALID,
79 };
80
81 #ifdef CONFIG_SMP
82 cpumask_t cpus_booted_once_mask;
83 #endif
84
85 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
86 static struct lockdep_map cpuhp_state_up_map =
87 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
88 static struct lockdep_map cpuhp_state_down_map =
89 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
90
91
cpuhp_lock_acquire(bool bringup)92 static inline void cpuhp_lock_acquire(bool bringup)
93 {
94 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
95 }
96
cpuhp_lock_release(bool bringup)97 static inline void cpuhp_lock_release(bool bringup)
98 {
99 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
100 }
101 #else
102
cpuhp_lock_acquire(bool bringup)103 static inline void cpuhp_lock_acquire(bool bringup) { }
cpuhp_lock_release(bool bringup)104 static inline void cpuhp_lock_release(bool bringup) { }
105
106 #endif
107
108 /**
109 * cpuhp_step - Hotplug state machine step
110 * @name: Name of the step
111 * @startup: Startup function of the step
112 * @teardown: Teardown function of the step
113 * @cant_stop: Bringup/teardown can't be stopped at this step
114 */
115 struct cpuhp_step {
116 const char *name;
117 union {
118 int (*single)(unsigned int cpu);
119 int (*multi)(unsigned int cpu,
120 struct hlist_node *node);
121 } startup;
122 union {
123 int (*single)(unsigned int cpu);
124 int (*multi)(unsigned int cpu,
125 struct hlist_node *node);
126 } teardown;
127 struct hlist_head list;
128 bool cant_stop;
129 bool multi_instance;
130 };
131
132 static DEFINE_MUTEX(cpuhp_state_mutex);
133 static struct cpuhp_step cpuhp_hp_states[];
134
cpuhp_get_step(enum cpuhp_state state)135 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
136 {
137 return cpuhp_hp_states + state;
138 }
139
140 /**
141 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
142 * @cpu: The cpu for which the callback should be invoked
143 * @state: The state to do callbacks for
144 * @bringup: True if the bringup callback should be invoked
145 * @node: For multi-instance, do a single entry callback for install/remove
146 * @lastp: For multi-instance rollback, remember how far we got
147 *
148 * Called from cpu hotplug and from the state register machinery.
149 */
cpuhp_invoke_callback(unsigned int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node,struct hlist_node ** lastp)150 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
151 bool bringup, struct hlist_node *node,
152 struct hlist_node **lastp)
153 {
154 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
155 struct cpuhp_step *step = cpuhp_get_step(state);
156 int (*cbm)(unsigned int cpu, struct hlist_node *node);
157 int (*cb)(unsigned int cpu);
158 int ret, cnt;
159
160 if (st->fail == state) {
161 st->fail = CPUHP_INVALID;
162
163 if (!(bringup ? step->startup.single : step->teardown.single))
164 return 0;
165
166 return -EAGAIN;
167 }
168
169 if (!step->multi_instance) {
170 WARN_ON_ONCE(lastp && *lastp);
171 cb = bringup ? step->startup.single : step->teardown.single;
172 if (!cb)
173 return 0;
174 trace_cpuhp_enter(cpu, st->target, state, cb);
175 ret = cb(cpu);
176 trace_cpuhp_exit(cpu, st->state, state, ret);
177 return ret;
178 }
179 cbm = bringup ? step->startup.multi : step->teardown.multi;
180 if (!cbm)
181 return 0;
182
183 /* Single invocation for instance add/remove */
184 if (node) {
185 WARN_ON_ONCE(lastp && *lastp);
186 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
187 ret = cbm(cpu, node);
188 trace_cpuhp_exit(cpu, st->state, state, ret);
189 return ret;
190 }
191
192 /* State transition. Invoke on all instances */
193 cnt = 0;
194 hlist_for_each(node, &step->list) {
195 if (lastp && node == *lastp)
196 break;
197
198 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
199 ret = cbm(cpu, node);
200 trace_cpuhp_exit(cpu, st->state, state, ret);
201 if (ret) {
202 if (!lastp)
203 goto err;
204
205 *lastp = node;
206 return ret;
207 }
208 cnt++;
209 }
210 if (lastp)
211 *lastp = NULL;
212 return 0;
213 err:
214 /* Rollback the instances if one failed */
215 cbm = !bringup ? step->startup.multi : step->teardown.multi;
216 if (!cbm)
217 return ret;
218
219 hlist_for_each(node, &step->list) {
220 if (!cnt--)
221 break;
222
223 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
224 ret = cbm(cpu, node);
225 trace_cpuhp_exit(cpu, st->state, state, ret);
226 /*
227 * Rollback must not fail,
228 */
229 WARN_ON_ONCE(ret);
230 }
231 return ret;
232 }
233
234 #ifdef CONFIG_SMP
cpuhp_is_ap_state(enum cpuhp_state state)235 static bool cpuhp_is_ap_state(enum cpuhp_state state)
236 {
237 /*
238 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
239 * purposes as that state is handled explicitly in cpu_down.
240 */
241 return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
242 }
243
wait_for_ap_thread(struct cpuhp_cpu_state * st,bool bringup)244 static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
245 {
246 struct completion *done = bringup ? &st->done_up : &st->done_down;
247 wait_for_completion(done);
248 }
249
complete_ap_thread(struct cpuhp_cpu_state * st,bool bringup)250 static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
251 {
252 struct completion *done = bringup ? &st->done_up : &st->done_down;
253 complete(done);
254 }
255
256 /*
257 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
258 */
cpuhp_is_atomic_state(enum cpuhp_state state)259 static bool cpuhp_is_atomic_state(enum cpuhp_state state)
260 {
261 return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
262 }
263
264 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
265 static DEFINE_MUTEX(cpu_add_remove_lock);
266 bool cpuhp_tasks_frozen;
267 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
268
269 /*
270 * The following two APIs (cpu_maps_update_begin/done) must be used when
271 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
272 */
cpu_maps_update_begin(void)273 void cpu_maps_update_begin(void)
274 {
275 mutex_lock(&cpu_add_remove_lock);
276 }
277
cpu_maps_update_done(void)278 void cpu_maps_update_done(void)
279 {
280 mutex_unlock(&cpu_add_remove_lock);
281 }
282
283 /*
284 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
285 * Should always be manipulated under cpu_add_remove_lock
286 */
287 static int cpu_hotplug_disabled;
288
289 #ifdef CONFIG_HOTPLUG_CPU
290
291 DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
292
cpus_read_lock(void)293 void cpus_read_lock(void)
294 {
295 percpu_down_read(&cpu_hotplug_lock);
296 }
297 EXPORT_SYMBOL_GPL(cpus_read_lock);
298
cpus_read_trylock(void)299 int cpus_read_trylock(void)
300 {
301 return percpu_down_read_trylock(&cpu_hotplug_lock);
302 }
303 EXPORT_SYMBOL_GPL(cpus_read_trylock);
304
cpus_read_unlock(void)305 void cpus_read_unlock(void)
306 {
307 percpu_up_read(&cpu_hotplug_lock);
308 }
309 EXPORT_SYMBOL_GPL(cpus_read_unlock);
310
cpus_write_lock(void)311 void cpus_write_lock(void)
312 {
313 percpu_down_write(&cpu_hotplug_lock);
314 }
315
cpus_write_unlock(void)316 void cpus_write_unlock(void)
317 {
318 percpu_up_write(&cpu_hotplug_lock);
319 }
320
lockdep_assert_cpus_held(void)321 void lockdep_assert_cpus_held(void)
322 {
323 /*
324 * We can't have hotplug operations before userspace starts running,
325 * and some init codepaths will knowingly not take the hotplug lock.
326 * This is all valid, so mute lockdep until it makes sense to report
327 * unheld locks.
328 */
329 if (system_state < SYSTEM_RUNNING)
330 return;
331
332 percpu_rwsem_assert_held(&cpu_hotplug_lock);
333 }
334
lockdep_acquire_cpus_lock(void)335 static void lockdep_acquire_cpus_lock(void)
336 {
337 rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
338 }
339
lockdep_release_cpus_lock(void)340 static void lockdep_release_cpus_lock(void)
341 {
342 rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_);
343 }
344
345 /*
346 * Wait for currently running CPU hotplug operations to complete (if any) and
347 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
348 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
349 * hotplug path before performing hotplug operations. So acquiring that lock
350 * guarantees mutual exclusion from any currently running hotplug operations.
351 */
cpu_hotplug_disable(void)352 void cpu_hotplug_disable(void)
353 {
354 cpu_maps_update_begin();
355 cpu_hotplug_disabled++;
356 cpu_maps_update_done();
357 }
358 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
359
__cpu_hotplug_enable(void)360 static void __cpu_hotplug_enable(void)
361 {
362 if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
363 return;
364 cpu_hotplug_disabled--;
365 }
366
cpu_hotplug_enable(void)367 void cpu_hotplug_enable(void)
368 {
369 cpu_maps_update_begin();
370 __cpu_hotplug_enable();
371 cpu_maps_update_done();
372 }
373 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
374
375 #else
376
lockdep_acquire_cpus_lock(void)377 static void lockdep_acquire_cpus_lock(void)
378 {
379 }
380
lockdep_release_cpus_lock(void)381 static void lockdep_release_cpus_lock(void)
382 {
383 }
384
385 #endif /* CONFIG_HOTPLUG_CPU */
386
387 /*
388 * Architectures that need SMT-specific errata handling during SMT hotplug
389 * should override this.
390 */
arch_smt_update(void)391 void __weak arch_smt_update(void) { }
392
393 #ifdef CONFIG_HOTPLUG_SMT
394 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
395
cpu_smt_disable(bool force)396 void __init cpu_smt_disable(bool force)
397 {
398 if (!cpu_smt_possible())
399 return;
400
401 if (force) {
402 pr_info("SMT: Force disabled\n");
403 cpu_smt_control = CPU_SMT_FORCE_DISABLED;
404 } else {
405 pr_info("SMT: disabled\n");
406 cpu_smt_control = CPU_SMT_DISABLED;
407 }
408 }
409
410 /*
411 * The decision whether SMT is supported can only be done after the full
412 * CPU identification. Called from architecture code.
413 */
cpu_smt_check_topology(void)414 void __init cpu_smt_check_topology(void)
415 {
416 if (!topology_smt_supported())
417 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
418 }
419
smt_cmdline_disable(char * str)420 static int __init smt_cmdline_disable(char *str)
421 {
422 cpu_smt_disable(str && !strcmp(str, "force"));
423 return 0;
424 }
425 early_param("nosmt", smt_cmdline_disable);
426
cpu_smt_allowed(unsigned int cpu)427 static inline bool cpu_smt_allowed(unsigned int cpu)
428 {
429 if (cpu_smt_control == CPU_SMT_ENABLED)
430 return true;
431
432 if (topology_is_primary_thread(cpu))
433 return true;
434
435 /*
436 * On x86 it's required to boot all logical CPUs at least once so
437 * that the init code can get a chance to set CR4.MCE on each
438 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
439 * core will shutdown the machine.
440 */
441 return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
442 }
443
444 /* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
cpu_smt_possible(void)445 bool cpu_smt_possible(void)
446 {
447 return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
448 cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
449 }
450 EXPORT_SYMBOL_GPL(cpu_smt_possible);
451 #else
cpu_smt_allowed(unsigned int cpu)452 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
453 #endif
454
455 static inline enum cpuhp_state
cpuhp_set_state(struct cpuhp_cpu_state * st,enum cpuhp_state target)456 cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
457 {
458 enum cpuhp_state prev_state = st->state;
459
460 st->rollback = false;
461 st->last = NULL;
462
463 st->target = target;
464 st->single = false;
465 st->bringup = st->state < target;
466
467 return prev_state;
468 }
469
470 static inline void
cpuhp_reset_state(struct cpuhp_cpu_state * st,enum cpuhp_state prev_state)471 cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
472 {
473 st->rollback = true;
474
475 /*
476 * If we have st->last we need to undo partial multi_instance of this
477 * state first. Otherwise start undo at the previous state.
478 */
479 if (!st->last) {
480 if (st->bringup)
481 st->state--;
482 else
483 st->state++;
484 }
485
486 st->target = prev_state;
487 st->bringup = !st->bringup;
488 }
489
490 /* Regular hotplug invocation of the AP hotplug thread */
__cpuhp_kick_ap(struct cpuhp_cpu_state * st)491 static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
492 {
493 if (!st->single && st->state == st->target)
494 return;
495
496 st->result = 0;
497 /*
498 * Make sure the above stores are visible before should_run becomes
499 * true. Paired with the mb() above in cpuhp_thread_fun()
500 */
501 smp_mb();
502 st->should_run = true;
503 wake_up_process(st->thread);
504 wait_for_ap_thread(st, st->bringup);
505 }
506
cpuhp_kick_ap(struct cpuhp_cpu_state * st,enum cpuhp_state target)507 static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
508 {
509 enum cpuhp_state prev_state;
510 int ret;
511
512 prev_state = cpuhp_set_state(st, target);
513 __cpuhp_kick_ap(st);
514 if ((ret = st->result)) {
515 cpuhp_reset_state(st, prev_state);
516 __cpuhp_kick_ap(st);
517 }
518
519 return ret;
520 }
521
bringup_wait_for_ap(unsigned int cpu)522 static int bringup_wait_for_ap(unsigned int cpu)
523 {
524 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
525
526 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
527 wait_for_ap_thread(st, true);
528 if (WARN_ON_ONCE((!cpu_online(cpu))))
529 return -ECANCELED;
530
531 /* Unpark the hotplug thread of the target cpu */
532 kthread_unpark(st->thread);
533
534 /*
535 * SMT soft disabling on X86 requires to bring the CPU out of the
536 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
537 * CPU marked itself as booted_once in notify_cpu_starting() so the
538 * cpu_smt_allowed() check will now return false if this is not the
539 * primary sibling.
540 */
541 if (!cpu_smt_allowed(cpu))
542 return -ECANCELED;
543
544 if (st->target <= CPUHP_AP_ONLINE_IDLE)
545 return 0;
546
547 return cpuhp_kick_ap(st, st->target);
548 }
549
bringup_cpu(unsigned int cpu)550 static int bringup_cpu(unsigned int cpu)
551 {
552 struct task_struct *idle = idle_thread_get(cpu);
553 int ret;
554
555 /*
556 * Some architectures have to walk the irq descriptors to
557 * setup the vector space for the cpu which comes online.
558 * Prevent irq alloc/free across the bringup.
559 */
560 irq_lock_sparse();
561
562 /* Arch-specific enabling code. */
563 ret = __cpu_up(cpu, idle);
564 irq_unlock_sparse();
565 if (ret)
566 return ret;
567 return bringup_wait_for_ap(cpu);
568 }
569
finish_cpu(unsigned int cpu)570 static int finish_cpu(unsigned int cpu)
571 {
572 struct task_struct *idle = idle_thread_get(cpu);
573 struct mm_struct *mm = idle->active_mm;
574
575 /*
576 * idle_task_exit() will have switched to &init_mm, now
577 * clean up any remaining active_mm state.
578 */
579 if (mm != &init_mm)
580 idle->active_mm = &init_mm;
581 mmdrop(mm);
582 return 0;
583 }
584
585 /*
586 * Hotplug state machine related functions
587 */
588
undo_cpu_up(unsigned int cpu,struct cpuhp_cpu_state * st)589 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
590 {
591 for (st->state--; st->state > st->target; st->state--)
592 cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
593 }
594
can_rollback_cpu(struct cpuhp_cpu_state * st)595 static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
596 {
597 if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
598 return true;
599 /*
600 * When CPU hotplug is disabled, then taking the CPU down is not
601 * possible because takedown_cpu() and the architecture and
602 * subsystem specific mechanisms are not available. So the CPU
603 * which would be completely unplugged again needs to stay around
604 * in the current state.
605 */
606 return st->state <= CPUHP_BRINGUP_CPU;
607 }
608
cpuhp_up_callbacks(unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)609 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
610 enum cpuhp_state target)
611 {
612 enum cpuhp_state prev_state = st->state;
613 int ret = 0;
614
615 while (st->state < target) {
616 st->state++;
617 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
618 if (ret) {
619 if (can_rollback_cpu(st)) {
620 st->target = prev_state;
621 undo_cpu_up(cpu, st);
622 }
623 break;
624 }
625 }
626 return ret;
627 }
628
629 /*
630 * The cpu hotplug threads manage the bringup and teardown of the cpus
631 */
cpuhp_create(unsigned int cpu)632 static void cpuhp_create(unsigned int cpu)
633 {
634 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
635
636 init_completion(&st->done_up);
637 init_completion(&st->done_down);
638 }
639
cpuhp_should_run(unsigned int cpu)640 static int cpuhp_should_run(unsigned int cpu)
641 {
642 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
643
644 return st->should_run;
645 }
646
647 /*
648 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
649 * callbacks when a state gets [un]installed at runtime.
650 *
651 * Each invocation of this function by the smpboot thread does a single AP
652 * state callback.
653 *
654 * It has 3 modes of operation:
655 * - single: runs st->cb_state
656 * - up: runs ++st->state, while st->state < st->target
657 * - down: runs st->state--, while st->state > st->target
658 *
659 * When complete or on error, should_run is cleared and the completion is fired.
660 */
cpuhp_thread_fun(unsigned int cpu)661 static void cpuhp_thread_fun(unsigned int cpu)
662 {
663 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
664 bool bringup = st->bringup;
665 enum cpuhp_state state;
666
667 if (WARN_ON_ONCE(!st->should_run))
668 return;
669
670 /*
671 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
672 * that if we see ->should_run we also see the rest of the state.
673 */
674 smp_mb();
675
676 /*
677 * The BP holds the hotplug lock, but we're now running on the AP,
678 * ensure that anybody asserting the lock is held, will actually find
679 * it so.
680 */
681 lockdep_acquire_cpus_lock();
682 cpuhp_lock_acquire(bringup);
683
684 if (st->single) {
685 state = st->cb_state;
686 st->should_run = false;
687 } else {
688 if (bringup) {
689 st->state++;
690 state = st->state;
691 st->should_run = (st->state < st->target);
692 WARN_ON_ONCE(st->state > st->target);
693 } else {
694 state = st->state;
695 st->state--;
696 st->should_run = (st->state > st->target);
697 WARN_ON_ONCE(st->state < st->target);
698 }
699 }
700
701 WARN_ON_ONCE(!cpuhp_is_ap_state(state));
702
703 if (cpuhp_is_atomic_state(state)) {
704 local_irq_disable();
705 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
706 local_irq_enable();
707
708 /*
709 * STARTING/DYING must not fail!
710 */
711 WARN_ON_ONCE(st->result);
712 } else {
713 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
714 }
715
716 if (st->result) {
717 /*
718 * If we fail on a rollback, we're up a creek without no
719 * paddle, no way forward, no way back. We loose, thanks for
720 * playing.
721 */
722 WARN_ON_ONCE(st->rollback);
723 st->should_run = false;
724 }
725
726 cpuhp_lock_release(bringup);
727 lockdep_release_cpus_lock();
728
729 if (!st->should_run)
730 complete_ap_thread(st, bringup);
731 }
732
733 /* Invoke a single callback on a remote cpu */
734 static int
cpuhp_invoke_ap_callback(int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node)735 cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
736 struct hlist_node *node)
737 {
738 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
739 int ret;
740
741 if (!cpu_online(cpu))
742 return 0;
743
744 cpuhp_lock_acquire(false);
745 cpuhp_lock_release(false);
746
747 cpuhp_lock_acquire(true);
748 cpuhp_lock_release(true);
749
750 /*
751 * If we are up and running, use the hotplug thread. For early calls
752 * we invoke the thread function directly.
753 */
754 if (!st->thread)
755 return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
756
757 st->rollback = false;
758 st->last = NULL;
759
760 st->node = node;
761 st->bringup = bringup;
762 st->cb_state = state;
763 st->single = true;
764
765 __cpuhp_kick_ap(st);
766
767 /*
768 * If we failed and did a partial, do a rollback.
769 */
770 if ((ret = st->result) && st->last) {
771 st->rollback = true;
772 st->bringup = !bringup;
773
774 __cpuhp_kick_ap(st);
775 }
776
777 /*
778 * Clean up the leftovers so the next hotplug operation wont use stale
779 * data.
780 */
781 st->node = st->last = NULL;
782 return ret;
783 }
784
cpuhp_kick_ap_work(unsigned int cpu)785 static int cpuhp_kick_ap_work(unsigned int cpu)
786 {
787 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
788 enum cpuhp_state prev_state = st->state;
789 int ret;
790
791 cpuhp_lock_acquire(false);
792 cpuhp_lock_release(false);
793
794 cpuhp_lock_acquire(true);
795 cpuhp_lock_release(true);
796
797 trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
798 ret = cpuhp_kick_ap(st, st->target);
799 trace_cpuhp_exit(cpu, st->state, prev_state, ret);
800
801 return ret;
802 }
803
804 static struct smp_hotplug_thread cpuhp_threads = {
805 .store = &cpuhp_state.thread,
806 .create = &cpuhp_create,
807 .thread_should_run = cpuhp_should_run,
808 .thread_fn = cpuhp_thread_fun,
809 .thread_comm = "cpuhp/%u",
810 .selfparking = true,
811 };
812
cpuhp_threads_init(void)813 void __init cpuhp_threads_init(void)
814 {
815 BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
816 kthread_unpark(this_cpu_read(cpuhp_state.thread));
817 }
818
819 /*
820 *
821 * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
822 * protected region.
823 *
824 * The operation is still serialized against concurrent CPU hotplug via
825 * cpu_add_remove_lock, i.e. CPU map protection. But it is _not_
826 * serialized against other hotplug related activity like adding or
827 * removing of state callbacks and state instances, which invoke either the
828 * startup or the teardown callback of the affected state.
829 *
830 * This is required for subsystems which are unfixable vs. CPU hotplug and
831 * evade lock inversion problems by scheduling work which has to be
832 * completed _before_ cpu_up()/_cpu_down() returns.
833 *
834 * Don't even think about adding anything to this for any new code or even
835 * drivers. It's only purpose is to keep existing lock order trainwrecks
836 * working.
837 *
838 * For cpu_down() there might be valid reasons to finish cleanups which are
839 * not required to be done under cpu_hotplug_lock, but that's a different
840 * story and would be not invoked via this.
841 */
cpu_up_down_serialize_trainwrecks(bool tasks_frozen)842 static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
843 {
844 /*
845 * cpusets delegate hotplug operations to a worker to "solve" the
846 * lock order problems. Wait for the worker, but only if tasks are
847 * _not_ frozen (suspend, hibernate) as that would wait forever.
848 *
849 * The wait is required because otherwise the hotplug operation
850 * returns with inconsistent state, which could even be observed in
851 * user space when a new CPU is brought up. The CPU plug uevent
852 * would be delivered and user space reacting on it would fail to
853 * move tasks to the newly plugged CPU up to the point where the
854 * work has finished because up to that point the newly plugged CPU
855 * is not assignable in cpusets/cgroups. On unplug that's not
856 * necessarily a visible issue, but it is still inconsistent state,
857 * which is the real problem which needs to be "fixed". This can't
858 * prevent the transient state between scheduling the work and
859 * returning from waiting for it.
860 */
861 if (!tasks_frozen)
862 cpuset_wait_for_hotplug();
863 }
864
865 #ifdef CONFIG_HOTPLUG_CPU
866 #ifndef arch_clear_mm_cpumask_cpu
867 #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
868 #endif
869
870 /**
871 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
872 * @cpu: a CPU id
873 *
874 * This function walks all processes, finds a valid mm struct for each one and
875 * then clears a corresponding bit in mm's cpumask. While this all sounds
876 * trivial, there are various non-obvious corner cases, which this function
877 * tries to solve in a safe manner.
878 *
879 * Also note that the function uses a somewhat relaxed locking scheme, so it may
880 * be called only for an already offlined CPU.
881 */
clear_tasks_mm_cpumask(int cpu)882 void clear_tasks_mm_cpumask(int cpu)
883 {
884 struct task_struct *p;
885
886 /*
887 * This function is called after the cpu is taken down and marked
888 * offline, so its not like new tasks will ever get this cpu set in
889 * their mm mask. -- Peter Zijlstra
890 * Thus, we may use rcu_read_lock() here, instead of grabbing
891 * full-fledged tasklist_lock.
892 */
893 WARN_ON(cpu_online(cpu));
894 rcu_read_lock();
895 for_each_process(p) {
896 struct task_struct *t;
897
898 /*
899 * Main thread might exit, but other threads may still have
900 * a valid mm. Find one.
901 */
902 t = find_lock_task_mm(p);
903 if (!t)
904 continue;
905 arch_clear_mm_cpumask_cpu(cpu, t->mm);
906 task_unlock(t);
907 }
908 rcu_read_unlock();
909 }
910
911 /* Take this CPU down. */
take_cpu_down(void * _param)912 static int take_cpu_down(void *_param)
913 {
914 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
915 enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
916 int err, cpu = smp_processor_id();
917 int ret;
918
919 /* Ensure this CPU doesn't handle any more interrupts. */
920 err = __cpu_disable();
921 if (err < 0)
922 return err;
923
924 /*
925 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
926 * do this step again.
927 */
928 WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
929 st->state--;
930 /* Invoke the former CPU_DYING callbacks */
931 for (; st->state > target; st->state--) {
932 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
933 /*
934 * DYING must not fail!
935 */
936 WARN_ON_ONCE(ret);
937 }
938
939 /* Give up timekeeping duties */
940 tick_handover_do_timer();
941 /* Remove CPU from timer broadcasting */
942 tick_offline_cpu(cpu);
943 /* Park the stopper thread */
944 stop_machine_park(cpu);
945 return 0;
946 }
947
takedown_cpu(unsigned int cpu)948 static int takedown_cpu(unsigned int cpu)
949 {
950 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
951 int err;
952
953 /* Park the smpboot threads */
954 kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
955
956 /*
957 * Prevent irq alloc/free while the dying cpu reorganizes the
958 * interrupt affinities.
959 */
960 irq_lock_sparse();
961
962 /*
963 * So now all preempt/rcu users must observe !cpu_active().
964 */
965 err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
966 if (err) {
967 /* CPU refused to die */
968 irq_unlock_sparse();
969 /* Unpark the hotplug thread so we can rollback there */
970 kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
971 return err;
972 }
973 BUG_ON(cpu_online(cpu));
974
975 /*
976 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
977 * all runnable tasks from the CPU, there's only the idle task left now
978 * that the migration thread is done doing the stop_machine thing.
979 *
980 * Wait for the stop thread to go away.
981 */
982 wait_for_ap_thread(st, false);
983 BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
984
985 /* Interrupts are moved away from the dying cpu, reenable alloc/free */
986 irq_unlock_sparse();
987
988 hotplug_cpu__broadcast_tick_pull(cpu);
989 /* This actually kills the CPU. */
990 __cpu_die(cpu);
991
992 tick_cleanup_dead_cpu(cpu);
993 rcutree_migrate_callbacks(cpu);
994 return 0;
995 }
996
cpuhp_complete_idle_dead(void * arg)997 static void cpuhp_complete_idle_dead(void *arg)
998 {
999 struct cpuhp_cpu_state *st = arg;
1000
1001 complete_ap_thread(st, false);
1002 }
1003
cpuhp_report_idle_dead(void)1004 void cpuhp_report_idle_dead(void)
1005 {
1006 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1007
1008 BUG_ON(st->state != CPUHP_AP_OFFLINE);
1009 rcu_report_dead(smp_processor_id());
1010 st->state = CPUHP_AP_IDLE_DEAD;
1011 /*
1012 * We cannot call complete after rcu_report_dead() so we delegate it
1013 * to an online cpu.
1014 */
1015 smp_call_function_single(cpumask_first(cpu_online_mask),
1016 cpuhp_complete_idle_dead, st, 0);
1017 }
1018
undo_cpu_down(unsigned int cpu,struct cpuhp_cpu_state * st)1019 static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
1020 {
1021 for (st->state++; st->state < st->target; st->state++)
1022 cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1023 }
1024
cpuhp_down_callbacks(unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)1025 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1026 enum cpuhp_state target)
1027 {
1028 enum cpuhp_state prev_state = st->state;
1029 int ret = 0;
1030
1031 for (; st->state > target; st->state--) {
1032 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
1033 if (ret) {
1034 st->target = prev_state;
1035 if (st->state < prev_state)
1036 undo_cpu_down(cpu, st);
1037 break;
1038 }
1039 }
1040 return ret;
1041 }
1042
1043 /* Requires cpu_add_remove_lock to be held */
_cpu_down(unsigned int cpu,int tasks_frozen,enum cpuhp_state target)1044 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
1045 enum cpuhp_state target)
1046 {
1047 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1048 int prev_state, ret = 0;
1049
1050 if (num_online_cpus() == 1)
1051 return -EBUSY;
1052
1053 if (!cpu_present(cpu))
1054 return -EINVAL;
1055
1056 cpus_write_lock();
1057
1058 cpuhp_tasks_frozen = tasks_frozen;
1059
1060 prev_state = cpuhp_set_state(st, target);
1061 /*
1062 * If the current CPU state is in the range of the AP hotplug thread,
1063 * then we need to kick the thread.
1064 */
1065 if (st->state > CPUHP_TEARDOWN_CPU) {
1066 st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1067 ret = cpuhp_kick_ap_work(cpu);
1068 /*
1069 * The AP side has done the error rollback already. Just
1070 * return the error code..
1071 */
1072 if (ret)
1073 goto out;
1074
1075 /*
1076 * We might have stopped still in the range of the AP hotplug
1077 * thread. Nothing to do anymore.
1078 */
1079 if (st->state > CPUHP_TEARDOWN_CPU)
1080 goto out;
1081
1082 st->target = target;
1083 }
1084 /*
1085 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1086 * to do the further cleanups.
1087 */
1088 ret = cpuhp_down_callbacks(cpu, st, target);
1089 if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
1090 cpuhp_reset_state(st, prev_state);
1091 __cpuhp_kick_ap(st);
1092 }
1093
1094 out:
1095 cpus_write_unlock();
1096 /*
1097 * Do post unplug cleanup. This is still protected against
1098 * concurrent CPU hotplug via cpu_add_remove_lock.
1099 */
1100 lockup_detector_cleanup();
1101 arch_smt_update();
1102 cpu_up_down_serialize_trainwrecks(tasks_frozen);
1103 return ret;
1104 }
1105
cpu_down_maps_locked(unsigned int cpu,enum cpuhp_state target)1106 static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1107 {
1108 if (cpu_hotplug_disabled)
1109 return -EBUSY;
1110 return _cpu_down(cpu, 0, target);
1111 }
1112
do_cpu_down(unsigned int cpu,enum cpuhp_state target)1113 static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
1114 {
1115 int err;
1116
1117 cpu_maps_update_begin();
1118 err = cpu_down_maps_locked(cpu, target);
1119 cpu_maps_update_done();
1120 return err;
1121 }
1122
cpu_down(unsigned int cpu)1123 int cpu_down(unsigned int cpu)
1124 {
1125 return do_cpu_down(cpu, CPUHP_OFFLINE);
1126 }
1127 EXPORT_SYMBOL(cpu_down);
1128
1129 #else
1130 #define takedown_cpu NULL
1131 #endif /*CONFIG_HOTPLUG_CPU*/
1132
1133 /**
1134 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1135 * @cpu: cpu that just started
1136 *
1137 * It must be called by the arch code on the new cpu, before the new cpu
1138 * enables interrupts and before the "boot" cpu returns from __cpu_up().
1139 */
notify_cpu_starting(unsigned int cpu)1140 void notify_cpu_starting(unsigned int cpu)
1141 {
1142 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1143 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1144 int ret;
1145
1146 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1147 cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1148 while (st->state < target) {
1149 st->state++;
1150 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1151 /*
1152 * STARTING must not fail!
1153 */
1154 WARN_ON_ONCE(ret);
1155 }
1156 }
1157
1158 /*
1159 * Called from the idle task. Wake up the controlling task which brings the
1160 * hotplug thread of the upcoming CPU up and then delegates the rest of the
1161 * online bringup to the hotplug thread.
1162 */
cpuhp_online_idle(enum cpuhp_state state)1163 void cpuhp_online_idle(enum cpuhp_state state)
1164 {
1165 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1166
1167 /* Happens for the boot cpu */
1168 if (state != CPUHP_AP_ONLINE_IDLE)
1169 return;
1170
1171 /*
1172 * Unpart the stopper thread before we start the idle loop (and start
1173 * scheduling); this ensures the stopper task is always available.
1174 */
1175 stop_machine_unpark(smp_processor_id());
1176
1177 st->state = CPUHP_AP_ONLINE_IDLE;
1178 complete_ap_thread(st, true);
1179 }
1180
1181 /* Requires cpu_add_remove_lock to be held */
_cpu_up(unsigned int cpu,int tasks_frozen,enum cpuhp_state target)1182 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1183 {
1184 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1185 struct task_struct *idle;
1186 int ret = 0;
1187
1188 cpus_write_lock();
1189
1190 if (!cpu_present(cpu)) {
1191 ret = -EINVAL;
1192 goto out;
1193 }
1194
1195 /*
1196 * The caller of do_cpu_up might have raced with another
1197 * caller. Ignore it for now.
1198 */
1199 if (st->state >= target)
1200 goto out;
1201
1202 if (st->state == CPUHP_OFFLINE) {
1203 /* Let it fail before we try to bring the cpu up */
1204 idle = idle_thread_get(cpu);
1205 if (IS_ERR(idle)) {
1206 ret = PTR_ERR(idle);
1207 goto out;
1208 }
1209 }
1210
1211 cpuhp_tasks_frozen = tasks_frozen;
1212
1213 cpuhp_set_state(st, target);
1214 /*
1215 * If the current CPU state is in the range of the AP hotplug thread,
1216 * then we need to kick the thread once more.
1217 */
1218 if (st->state > CPUHP_BRINGUP_CPU) {
1219 ret = cpuhp_kick_ap_work(cpu);
1220 /*
1221 * The AP side has done the error rollback already. Just
1222 * return the error code..
1223 */
1224 if (ret)
1225 goto out;
1226 }
1227
1228 /*
1229 * Try to reach the target state. We max out on the BP at
1230 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1231 * responsible for bringing it up to the target state.
1232 */
1233 target = min((int)target, CPUHP_BRINGUP_CPU);
1234 ret = cpuhp_up_callbacks(cpu, st, target);
1235 out:
1236 cpus_write_unlock();
1237 arch_smt_update();
1238 cpu_up_down_serialize_trainwrecks(tasks_frozen);
1239 return ret;
1240 }
1241
do_cpu_up(unsigned int cpu,enum cpuhp_state target)1242 static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1243 {
1244 int err = 0;
1245
1246 if (!cpu_possible(cpu)) {
1247 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1248 cpu);
1249 #if defined(CONFIG_IA64)
1250 pr_err("please check additional_cpus= boot parameter\n");
1251 #endif
1252 return -EINVAL;
1253 }
1254
1255 err = try_online_node(cpu_to_node(cpu));
1256 if (err)
1257 return err;
1258
1259 cpu_maps_update_begin();
1260
1261 if (cpu_hotplug_disabled) {
1262 err = -EBUSY;
1263 goto out;
1264 }
1265 if (!cpu_smt_allowed(cpu)) {
1266 err = -EPERM;
1267 goto out;
1268 }
1269
1270 err = _cpu_up(cpu, 0, target);
1271 out:
1272 cpu_maps_update_done();
1273 return err;
1274 }
1275
cpu_up(unsigned int cpu)1276 int cpu_up(unsigned int cpu)
1277 {
1278 return do_cpu_up(cpu, CPUHP_ONLINE);
1279 }
1280 EXPORT_SYMBOL_GPL(cpu_up);
1281
1282 #ifdef CONFIG_PM_SLEEP_SMP
1283 static cpumask_var_t frozen_cpus;
1284
__freeze_secondary_cpus(int primary,bool suspend)1285 int __freeze_secondary_cpus(int primary, bool suspend)
1286 {
1287 int cpu, error = 0;
1288
1289 cpu_maps_update_begin();
1290 if (primary == -1) {
1291 primary = cpumask_first(cpu_online_mask);
1292 if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
1293 primary = housekeeping_any_cpu(HK_FLAG_TIMER);
1294 } else {
1295 if (!cpu_online(primary))
1296 primary = cpumask_first(cpu_online_mask);
1297 }
1298
1299 /*
1300 * We take down all of the non-boot CPUs in one shot to avoid races
1301 * with the userspace trying to use the CPU hotplug at the same time
1302 */
1303 cpumask_clear(frozen_cpus);
1304
1305 pr_info("Disabling non-boot CPUs ...\n");
1306 for_each_online_cpu(cpu) {
1307 if (cpu == primary)
1308 continue;
1309
1310 if (suspend && pm_wakeup_pending()) {
1311 pr_info("Wakeup pending. Abort CPU freeze\n");
1312 error = -EBUSY;
1313 break;
1314 }
1315
1316 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1317 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1318 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1319 if (!error)
1320 cpumask_set_cpu(cpu, frozen_cpus);
1321 else {
1322 pr_err("Error taking CPU%d down: %d\n", cpu, error);
1323 break;
1324 }
1325 }
1326
1327 if (!error)
1328 BUG_ON(num_online_cpus() > 1);
1329 else
1330 pr_err("Non-boot CPUs are not disabled\n");
1331
1332 /*
1333 * Make sure the CPUs won't be enabled by someone else. We need to do
1334 * this even in case of failure as all disable_nonboot_cpus() users are
1335 * supposed to do enable_nonboot_cpus() on the failure path.
1336 */
1337 cpu_hotplug_disabled++;
1338
1339 cpu_maps_update_done();
1340 return error;
1341 }
1342
arch_enable_nonboot_cpus_begin(void)1343 void __weak arch_enable_nonboot_cpus_begin(void)
1344 {
1345 }
1346
arch_enable_nonboot_cpus_end(void)1347 void __weak arch_enable_nonboot_cpus_end(void)
1348 {
1349 }
1350
enable_nonboot_cpus(void)1351 void enable_nonboot_cpus(void)
1352 {
1353 int cpu, error;
1354 struct device *cpu_device;
1355
1356 /* Allow everyone to use the CPU hotplug again */
1357 cpu_maps_update_begin();
1358 __cpu_hotplug_enable();
1359 if (cpumask_empty(frozen_cpus))
1360 goto out;
1361
1362 pr_info("Enabling non-boot CPUs ...\n");
1363
1364 arch_enable_nonboot_cpus_begin();
1365
1366 for_each_cpu(cpu, frozen_cpus) {
1367 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1368 error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1369 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1370 if (!error) {
1371 pr_info("CPU%d is up\n", cpu);
1372 cpu_device = get_cpu_device(cpu);
1373 if (!cpu_device)
1374 pr_err("%s: failed to get cpu%d device\n",
1375 __func__, cpu);
1376 else
1377 kobject_uevent(&cpu_device->kobj, KOBJ_ONLINE);
1378 continue;
1379 }
1380 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1381 }
1382
1383 arch_enable_nonboot_cpus_end();
1384
1385 cpumask_clear(frozen_cpus);
1386 out:
1387 cpu_maps_update_done();
1388 }
1389
alloc_frozen_cpus(void)1390 static int __init alloc_frozen_cpus(void)
1391 {
1392 if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1393 return -ENOMEM;
1394 return 0;
1395 }
1396 core_initcall(alloc_frozen_cpus);
1397
1398 /*
1399 * When callbacks for CPU hotplug notifications are being executed, we must
1400 * ensure that the state of the system with respect to the tasks being frozen
1401 * or not, as reported by the notification, remains unchanged *throughout the
1402 * duration* of the execution of the callbacks.
1403 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1404 *
1405 * This synchronization is implemented by mutually excluding regular CPU
1406 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1407 * Hibernate notifications.
1408 */
1409 static int
cpu_hotplug_pm_callback(struct notifier_block * nb,unsigned long action,void * ptr)1410 cpu_hotplug_pm_callback(struct notifier_block *nb,
1411 unsigned long action, void *ptr)
1412 {
1413 switch (action) {
1414
1415 case PM_SUSPEND_PREPARE:
1416 case PM_HIBERNATION_PREPARE:
1417 cpu_hotplug_disable();
1418 break;
1419
1420 case PM_POST_SUSPEND:
1421 case PM_POST_HIBERNATION:
1422 cpu_hotplug_enable();
1423 break;
1424
1425 default:
1426 return NOTIFY_DONE;
1427 }
1428
1429 return NOTIFY_OK;
1430 }
1431
1432
cpu_hotplug_pm_sync_init(void)1433 static int __init cpu_hotplug_pm_sync_init(void)
1434 {
1435 /*
1436 * cpu_hotplug_pm_callback has higher priority than x86
1437 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1438 * to disable cpu hotplug to avoid cpu hotplug race.
1439 */
1440 pm_notifier(cpu_hotplug_pm_callback, 0);
1441 return 0;
1442 }
1443 core_initcall(cpu_hotplug_pm_sync_init);
1444
1445 #endif /* CONFIG_PM_SLEEP_SMP */
1446
1447 int __boot_cpu_id;
1448
1449 /* Horrific hacks because we can't add more to cpuhp_hp_states. */
random_and_perf_prepare_fusion(unsigned int cpu)1450 static int random_and_perf_prepare_fusion(unsigned int cpu)
1451 {
1452 #ifdef CONFIG_PERF_EVENTS
1453 perf_event_init_cpu(cpu);
1454 #endif
1455 random_prepare_cpu(cpu);
1456 return 0;
1457 }
random_and_workqueue_online_fusion(unsigned int cpu)1458 static int random_and_workqueue_online_fusion(unsigned int cpu)
1459 {
1460 workqueue_online_cpu(cpu);
1461 random_online_cpu(cpu);
1462 return 0;
1463 }
1464
1465 #endif /* CONFIG_SMP */
1466
1467 /* Boot processor state steps */
1468 static struct cpuhp_step cpuhp_hp_states[] = {
1469 [CPUHP_OFFLINE] = {
1470 .name = "offline",
1471 .startup.single = NULL,
1472 .teardown.single = NULL,
1473 },
1474 #ifdef CONFIG_SMP
1475 [CPUHP_CREATE_THREADS]= {
1476 .name = "threads:prepare",
1477 .startup.single = smpboot_create_threads,
1478 .teardown.single = NULL,
1479 .cant_stop = true,
1480 },
1481 [CPUHP_PERF_PREPARE] = {
1482 .name = "perf:prepare",
1483 .startup.single = random_and_perf_prepare_fusion,
1484 .teardown.single = perf_event_exit_cpu,
1485 },
1486 [CPUHP_WORKQUEUE_PREP] = {
1487 .name = "workqueue:prepare",
1488 .startup.single = workqueue_prepare_cpu,
1489 .teardown.single = NULL,
1490 },
1491 [CPUHP_HRTIMERS_PREPARE] = {
1492 .name = "hrtimers:prepare",
1493 .startup.single = hrtimers_prepare_cpu,
1494 .teardown.single = hrtimers_dead_cpu,
1495 },
1496 [CPUHP_SMPCFD_PREPARE] = {
1497 .name = "smpcfd:prepare",
1498 .startup.single = smpcfd_prepare_cpu,
1499 .teardown.single = smpcfd_dead_cpu,
1500 },
1501 [CPUHP_RELAY_PREPARE] = {
1502 .name = "relay:prepare",
1503 .startup.single = relay_prepare_cpu,
1504 .teardown.single = NULL,
1505 },
1506 [CPUHP_SLAB_PREPARE] = {
1507 .name = "slab:prepare",
1508 .startup.single = slab_prepare_cpu,
1509 .teardown.single = slab_dead_cpu,
1510 },
1511 [CPUHP_RCUTREE_PREP] = {
1512 .name = "RCU/tree:prepare",
1513 .startup.single = rcutree_prepare_cpu,
1514 .teardown.single = rcutree_dead_cpu,
1515 },
1516 /*
1517 * On the tear-down path, timers_dead_cpu() must be invoked
1518 * before blk_mq_queue_reinit_notify() from notify_dead(),
1519 * otherwise a RCU stall occurs.
1520 */
1521 [CPUHP_TIMERS_PREPARE] = {
1522 .name = "timers:prepare",
1523 .startup.single = timers_prepare_cpu,
1524 .teardown.single = timers_dead_cpu,
1525 },
1526 /* Kicks the plugged cpu into life */
1527 [CPUHP_BRINGUP_CPU] = {
1528 .name = "cpu:bringup",
1529 .startup.single = bringup_cpu,
1530 .teardown.single = finish_cpu,
1531 .cant_stop = true,
1532 },
1533 /* Final state before CPU kills itself */
1534 [CPUHP_AP_IDLE_DEAD] = {
1535 .name = "idle:dead",
1536 },
1537 /*
1538 * Last state before CPU enters the idle loop to die. Transient state
1539 * for synchronization.
1540 */
1541 [CPUHP_AP_OFFLINE] = {
1542 .name = "ap:offline",
1543 .cant_stop = true,
1544 },
1545 /* First state is scheduler control. Interrupts are disabled */
1546 [CPUHP_AP_SCHED_STARTING] = {
1547 .name = "sched:starting",
1548 .startup.single = sched_cpu_starting,
1549 .teardown.single = sched_cpu_dying,
1550 },
1551 [CPUHP_AP_RCUTREE_DYING] = {
1552 .name = "RCU/tree:dying",
1553 .startup.single = NULL,
1554 .teardown.single = rcutree_dying_cpu,
1555 },
1556 [CPUHP_AP_SMPCFD_DYING] = {
1557 .name = "smpcfd:dying",
1558 .startup.single = NULL,
1559 .teardown.single = smpcfd_dying_cpu,
1560 },
1561 /* Entry state on starting. Interrupts enabled from here on. Transient
1562 * state for synchronsization */
1563 [CPUHP_AP_ONLINE] = {
1564 .name = "ap:online",
1565 },
1566 /*
1567 * Handled on controll processor until the plugged processor manages
1568 * this itself.
1569 */
1570 [CPUHP_TEARDOWN_CPU] = {
1571 .name = "cpu:teardown",
1572 .startup.single = NULL,
1573 .teardown.single = takedown_cpu,
1574 .cant_stop = true,
1575 },
1576 /* Handle smpboot threads park/unpark */
1577 [CPUHP_AP_SMPBOOT_THREADS] = {
1578 .name = "smpboot/threads:online",
1579 .startup.single = smpboot_unpark_threads,
1580 .teardown.single = smpboot_park_threads,
1581 },
1582 [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
1583 .name = "irq/affinity:online",
1584 .startup.single = irq_affinity_online_cpu,
1585 .teardown.single = NULL,
1586 },
1587 [CPUHP_AP_PERF_ONLINE] = {
1588 .name = "perf:online",
1589 .startup.single = perf_event_init_cpu,
1590 .teardown.single = perf_event_exit_cpu,
1591 },
1592 [CPUHP_AP_WATCHDOG_ONLINE] = {
1593 .name = "lockup_detector:online",
1594 .startup.single = lockup_detector_online_cpu,
1595 .teardown.single = lockup_detector_offline_cpu,
1596 },
1597 [CPUHP_AP_WORKQUEUE_ONLINE] = {
1598 .name = "workqueue:online",
1599 .startup.single = random_and_workqueue_online_fusion,
1600 .teardown.single = workqueue_offline_cpu,
1601 },
1602 [CPUHP_AP_RCUTREE_ONLINE] = {
1603 .name = "RCU/tree:online",
1604 .startup.single = rcutree_online_cpu,
1605 .teardown.single = rcutree_offline_cpu,
1606 },
1607 #endif
1608 /*
1609 * The dynamically registered state space is here
1610 */
1611
1612 #ifdef CONFIG_SMP
1613 /* Last state is scheduler control setting the cpu active */
1614 [CPUHP_AP_ACTIVE] = {
1615 .name = "sched:active",
1616 .startup.single = sched_cpu_activate,
1617 .teardown.single = sched_cpu_deactivate,
1618 },
1619 #endif
1620
1621 /* CPU is fully up and running. */
1622 [CPUHP_ONLINE] = {
1623 .name = "online",
1624 .startup.single = NULL,
1625 .teardown.single = NULL,
1626 },
1627 };
1628
1629 /* Sanity check for callbacks */
cpuhp_cb_check(enum cpuhp_state state)1630 static int cpuhp_cb_check(enum cpuhp_state state)
1631 {
1632 if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1633 return -EINVAL;
1634 return 0;
1635 }
1636
1637 /*
1638 * Returns a free for dynamic slot assignment of the Online state. The states
1639 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1640 * by having no name assigned.
1641 */
cpuhp_reserve_state(enum cpuhp_state state)1642 static int cpuhp_reserve_state(enum cpuhp_state state)
1643 {
1644 enum cpuhp_state i, end;
1645 struct cpuhp_step *step;
1646
1647 switch (state) {
1648 case CPUHP_AP_ONLINE_DYN:
1649 step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1650 end = CPUHP_AP_ONLINE_DYN_END;
1651 break;
1652 case CPUHP_BP_PREPARE_DYN:
1653 step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1654 end = CPUHP_BP_PREPARE_DYN_END;
1655 break;
1656 default:
1657 return -EINVAL;
1658 }
1659
1660 for (i = state; i <= end; i++, step++) {
1661 if (!step->name)
1662 return i;
1663 }
1664 WARN(1, "No more dynamic states available for CPU hotplug\n");
1665 return -ENOSPC;
1666 }
1667
cpuhp_store_callbacks(enum cpuhp_state state,const char * name,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)1668 static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1669 int (*startup)(unsigned int cpu),
1670 int (*teardown)(unsigned int cpu),
1671 bool multi_instance)
1672 {
1673 /* (Un)Install the callbacks for further cpu hotplug operations */
1674 struct cpuhp_step *sp;
1675 int ret = 0;
1676
1677 /*
1678 * If name is NULL, then the state gets removed.
1679 *
1680 * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1681 * the first allocation from these dynamic ranges, so the removal
1682 * would trigger a new allocation and clear the wrong (already
1683 * empty) state, leaving the callbacks of the to be cleared state
1684 * dangling, which causes wreckage on the next hotplug operation.
1685 */
1686 if (name && (state == CPUHP_AP_ONLINE_DYN ||
1687 state == CPUHP_BP_PREPARE_DYN)) {
1688 ret = cpuhp_reserve_state(state);
1689 if (ret < 0)
1690 return ret;
1691 state = ret;
1692 }
1693 sp = cpuhp_get_step(state);
1694 if (name && sp->name)
1695 return -EBUSY;
1696
1697 sp->startup.single = startup;
1698 sp->teardown.single = teardown;
1699 sp->name = name;
1700 sp->multi_instance = multi_instance;
1701 INIT_HLIST_HEAD(&sp->list);
1702 return ret;
1703 }
1704
cpuhp_get_teardown_cb(enum cpuhp_state state)1705 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1706 {
1707 return cpuhp_get_step(state)->teardown.single;
1708 }
1709
1710 /*
1711 * Call the startup/teardown function for a step either on the AP or
1712 * on the current CPU.
1713 */
cpuhp_issue_call(int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node)1714 static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1715 struct hlist_node *node)
1716 {
1717 struct cpuhp_step *sp = cpuhp_get_step(state);
1718 int ret;
1719
1720 /*
1721 * If there's nothing to do, we done.
1722 * Relies on the union for multi_instance.
1723 */
1724 if ((bringup && !sp->startup.single) ||
1725 (!bringup && !sp->teardown.single))
1726 return 0;
1727 /*
1728 * The non AP bound callbacks can fail on bringup. On teardown
1729 * e.g. module removal we crash for now.
1730 */
1731 #ifdef CONFIG_SMP
1732 if (cpuhp_is_ap_state(state))
1733 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1734 else
1735 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1736 #else
1737 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1738 #endif
1739 BUG_ON(ret && !bringup);
1740 return ret;
1741 }
1742
1743 /*
1744 * Called from __cpuhp_setup_state on a recoverable failure.
1745 *
1746 * Note: The teardown callbacks for rollback are not allowed to fail!
1747 */
cpuhp_rollback_install(int failedcpu,enum cpuhp_state state,struct hlist_node * node)1748 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1749 struct hlist_node *node)
1750 {
1751 int cpu;
1752
1753 /* Roll back the already executed steps on the other cpus */
1754 for_each_present_cpu(cpu) {
1755 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1756 int cpustate = st->state;
1757
1758 if (cpu >= failedcpu)
1759 break;
1760
1761 /* Did we invoke the startup call on that cpu ? */
1762 if (cpustate >= state)
1763 cpuhp_issue_call(cpu, state, false, node);
1764 }
1765 }
1766
__cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,struct hlist_node * node,bool invoke)1767 int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
1768 struct hlist_node *node,
1769 bool invoke)
1770 {
1771 struct cpuhp_step *sp;
1772 int cpu;
1773 int ret;
1774
1775 lockdep_assert_cpus_held();
1776
1777 sp = cpuhp_get_step(state);
1778 if (sp->multi_instance == false)
1779 return -EINVAL;
1780
1781 mutex_lock(&cpuhp_state_mutex);
1782
1783 if (!invoke || !sp->startup.multi)
1784 goto add_node;
1785
1786 /*
1787 * Try to call the startup callback for each present cpu
1788 * depending on the hotplug state of the cpu.
1789 */
1790 for_each_present_cpu(cpu) {
1791 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1792 int cpustate = st->state;
1793
1794 if (cpustate < state)
1795 continue;
1796
1797 ret = cpuhp_issue_call(cpu, state, true, node);
1798 if (ret) {
1799 if (sp->teardown.multi)
1800 cpuhp_rollback_install(cpu, state, node);
1801 goto unlock;
1802 }
1803 }
1804 add_node:
1805 ret = 0;
1806 hlist_add_head(node, &sp->list);
1807 unlock:
1808 mutex_unlock(&cpuhp_state_mutex);
1809 return ret;
1810 }
1811
__cpuhp_state_add_instance(enum cpuhp_state state,struct hlist_node * node,bool invoke)1812 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1813 bool invoke)
1814 {
1815 int ret;
1816
1817 cpus_read_lock();
1818 ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1819 cpus_read_unlock();
1820 return ret;
1821 }
1822 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1823
1824 /**
1825 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1826 * @state: The state to setup
1827 * @invoke: If true, the startup function is invoked for cpus where
1828 * cpu state >= @state
1829 * @startup: startup callback function
1830 * @teardown: teardown callback function
1831 * @multi_instance: State is set up for multiple instances which get
1832 * added afterwards.
1833 *
1834 * The caller needs to hold cpus read locked while calling this function.
1835 * Returns:
1836 * On success:
1837 * Positive state number if @state is CPUHP_AP_ONLINE_DYN
1838 * 0 for all other states
1839 * On failure: proper (negative) error code
1840 */
__cpuhp_setup_state_cpuslocked(enum cpuhp_state state,const char * name,bool invoke,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)1841 int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
1842 const char *name, bool invoke,
1843 int (*startup)(unsigned int cpu),
1844 int (*teardown)(unsigned int cpu),
1845 bool multi_instance)
1846 {
1847 int cpu, ret = 0;
1848 bool dynstate;
1849
1850 lockdep_assert_cpus_held();
1851
1852 if (cpuhp_cb_check(state) || !name)
1853 return -EINVAL;
1854
1855 mutex_lock(&cpuhp_state_mutex);
1856
1857 ret = cpuhp_store_callbacks(state, name, startup, teardown,
1858 multi_instance);
1859
1860 dynstate = state == CPUHP_AP_ONLINE_DYN;
1861 if (ret > 0 && dynstate) {
1862 state = ret;
1863 ret = 0;
1864 }
1865
1866 if (ret || !invoke || !startup)
1867 goto out;
1868
1869 /*
1870 * Try to call the startup callback for each present cpu
1871 * depending on the hotplug state of the cpu.
1872 */
1873 for_each_present_cpu(cpu) {
1874 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1875 int cpustate = st->state;
1876
1877 if (cpustate < state)
1878 continue;
1879
1880 ret = cpuhp_issue_call(cpu, state, true, NULL);
1881 if (ret) {
1882 if (teardown)
1883 cpuhp_rollback_install(cpu, state, NULL);
1884 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1885 goto out;
1886 }
1887 }
1888 out:
1889 mutex_unlock(&cpuhp_state_mutex);
1890 /*
1891 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
1892 * dynamically allocated state in case of success.
1893 */
1894 if (!ret && dynstate)
1895 return state;
1896 return ret;
1897 }
1898 EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
1899
__cpuhp_setup_state(enum cpuhp_state state,const char * name,bool invoke,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)1900 int __cpuhp_setup_state(enum cpuhp_state state,
1901 const char *name, bool invoke,
1902 int (*startup)(unsigned int cpu),
1903 int (*teardown)(unsigned int cpu),
1904 bool multi_instance)
1905 {
1906 int ret;
1907
1908 cpus_read_lock();
1909 ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
1910 teardown, multi_instance);
1911 cpus_read_unlock();
1912 return ret;
1913 }
1914 EXPORT_SYMBOL(__cpuhp_setup_state);
1915
__cpuhp_state_remove_instance(enum cpuhp_state state,struct hlist_node * node,bool invoke)1916 int __cpuhp_state_remove_instance(enum cpuhp_state state,
1917 struct hlist_node *node, bool invoke)
1918 {
1919 struct cpuhp_step *sp = cpuhp_get_step(state);
1920 int cpu;
1921
1922 BUG_ON(cpuhp_cb_check(state));
1923
1924 if (!sp->multi_instance)
1925 return -EINVAL;
1926
1927 cpus_read_lock();
1928 mutex_lock(&cpuhp_state_mutex);
1929
1930 if (!invoke || !cpuhp_get_teardown_cb(state))
1931 goto remove;
1932 /*
1933 * Call the teardown callback for each present cpu depending
1934 * on the hotplug state of the cpu. This function is not
1935 * allowed to fail currently!
1936 */
1937 for_each_present_cpu(cpu) {
1938 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1939 int cpustate = st->state;
1940
1941 if (cpustate >= state)
1942 cpuhp_issue_call(cpu, state, false, node);
1943 }
1944
1945 remove:
1946 hlist_del(node);
1947 mutex_unlock(&cpuhp_state_mutex);
1948 cpus_read_unlock();
1949
1950 return 0;
1951 }
1952 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1953
1954 /**
1955 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
1956 * @state: The state to remove
1957 * @invoke: If true, the teardown function is invoked for cpus where
1958 * cpu state >= @state
1959 *
1960 * The caller needs to hold cpus read locked while calling this function.
1961 * The teardown callback is currently not allowed to fail. Think
1962 * about module removal!
1963 */
__cpuhp_remove_state_cpuslocked(enum cpuhp_state state,bool invoke)1964 void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
1965 {
1966 struct cpuhp_step *sp = cpuhp_get_step(state);
1967 int cpu;
1968
1969 BUG_ON(cpuhp_cb_check(state));
1970
1971 lockdep_assert_cpus_held();
1972
1973 mutex_lock(&cpuhp_state_mutex);
1974 if (sp->multi_instance) {
1975 WARN(!hlist_empty(&sp->list),
1976 "Error: Removing state %d which has instances left.\n",
1977 state);
1978 goto remove;
1979 }
1980
1981 if (!invoke || !cpuhp_get_teardown_cb(state))
1982 goto remove;
1983
1984 /*
1985 * Call the teardown callback for each present cpu depending
1986 * on the hotplug state of the cpu. This function is not
1987 * allowed to fail currently!
1988 */
1989 for_each_present_cpu(cpu) {
1990 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1991 int cpustate = st->state;
1992
1993 if (cpustate >= state)
1994 cpuhp_issue_call(cpu, state, false, NULL);
1995 }
1996 remove:
1997 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1998 mutex_unlock(&cpuhp_state_mutex);
1999 }
2000 EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2001
__cpuhp_remove_state(enum cpuhp_state state,bool invoke)2002 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2003 {
2004 cpus_read_lock();
2005 __cpuhp_remove_state_cpuslocked(state, invoke);
2006 cpus_read_unlock();
2007 }
2008 EXPORT_SYMBOL(__cpuhp_remove_state);
2009
2010 #ifdef CONFIG_HOTPLUG_SMT
cpuhp_offline_cpu_device(unsigned int cpu)2011 static void cpuhp_offline_cpu_device(unsigned int cpu)
2012 {
2013 struct device *dev = get_cpu_device(cpu);
2014
2015 dev->offline = true;
2016 /* Tell user space about the state change */
2017 kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2018 }
2019
cpuhp_online_cpu_device(unsigned int cpu)2020 static void cpuhp_online_cpu_device(unsigned int cpu)
2021 {
2022 struct device *dev = get_cpu_device(cpu);
2023
2024 dev->offline = false;
2025 /* Tell user space about the state change */
2026 kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2027 }
2028
cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)2029 int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2030 {
2031 int cpu, ret = 0;
2032
2033 cpu_maps_update_begin();
2034 for_each_online_cpu(cpu) {
2035 if (topology_is_primary_thread(cpu))
2036 continue;
2037 ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2038 if (ret)
2039 break;
2040 /*
2041 * As this needs to hold the cpu maps lock it's impossible
2042 * to call device_offline() because that ends up calling
2043 * cpu_down() which takes cpu maps lock. cpu maps lock
2044 * needs to be held as this might race against in kernel
2045 * abusers of the hotplug machinery (thermal management).
2046 *
2047 * So nothing would update device:offline state. That would
2048 * leave the sysfs entry stale and prevent onlining after
2049 * smt control has been changed to 'off' again. This is
2050 * called under the sysfs hotplug lock, so it is properly
2051 * serialized against the regular offline usage.
2052 */
2053 cpuhp_offline_cpu_device(cpu);
2054 }
2055 if (!ret)
2056 cpu_smt_control = ctrlval;
2057 cpu_maps_update_done();
2058 return ret;
2059 }
2060
cpuhp_smt_enable(void)2061 int cpuhp_smt_enable(void)
2062 {
2063 int cpu, ret = 0;
2064
2065 cpu_maps_update_begin();
2066 cpu_smt_control = CPU_SMT_ENABLED;
2067 for_each_present_cpu(cpu) {
2068 /* Skip online CPUs and CPUs on offline nodes */
2069 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2070 continue;
2071 ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2072 if (ret)
2073 break;
2074 /* See comment in cpuhp_smt_disable() */
2075 cpuhp_online_cpu_device(cpu);
2076 }
2077 cpu_maps_update_done();
2078 return ret;
2079 }
2080 #endif
2081
2082 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
show_cpuhp_state(struct device * dev,struct device_attribute * attr,char * buf)2083 static ssize_t show_cpuhp_state(struct device *dev,
2084 struct device_attribute *attr, char *buf)
2085 {
2086 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2087
2088 return sprintf(buf, "%d\n", st->state);
2089 }
2090 static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
2091
write_cpuhp_target(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2092 static ssize_t write_cpuhp_target(struct device *dev,
2093 struct device_attribute *attr,
2094 const char *buf, size_t count)
2095 {
2096 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2097 struct cpuhp_step *sp;
2098 int target, ret;
2099
2100 ret = kstrtoint(buf, 10, &target);
2101 if (ret)
2102 return ret;
2103
2104 #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2105 if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
2106 return -EINVAL;
2107 #else
2108 if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
2109 return -EINVAL;
2110 #endif
2111
2112 ret = lock_device_hotplug_sysfs();
2113 if (ret)
2114 return ret;
2115
2116 mutex_lock(&cpuhp_state_mutex);
2117 sp = cpuhp_get_step(target);
2118 ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2119 mutex_unlock(&cpuhp_state_mutex);
2120 if (ret)
2121 goto out;
2122
2123 if (st->state < target)
2124 ret = do_cpu_up(dev->id, target);
2125 else
2126 ret = do_cpu_down(dev->id, target);
2127 out:
2128 unlock_device_hotplug();
2129 return ret ? ret : count;
2130 }
2131
show_cpuhp_target(struct device * dev,struct device_attribute * attr,char * buf)2132 static ssize_t show_cpuhp_target(struct device *dev,
2133 struct device_attribute *attr, char *buf)
2134 {
2135 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2136
2137 return sprintf(buf, "%d\n", st->target);
2138 }
2139 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
2140
2141
write_cpuhp_fail(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2142 static ssize_t write_cpuhp_fail(struct device *dev,
2143 struct device_attribute *attr,
2144 const char *buf, size_t count)
2145 {
2146 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2147 struct cpuhp_step *sp;
2148 int fail, ret;
2149
2150 ret = kstrtoint(buf, 10, &fail);
2151 if (ret)
2152 return ret;
2153
2154 if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
2155 return -EINVAL;
2156
2157 /*
2158 * Cannot fail STARTING/DYING callbacks.
2159 */
2160 if (cpuhp_is_atomic_state(fail))
2161 return -EINVAL;
2162
2163 /*
2164 * Cannot fail anything that doesn't have callbacks.
2165 */
2166 mutex_lock(&cpuhp_state_mutex);
2167 sp = cpuhp_get_step(fail);
2168 if (!sp->startup.single && !sp->teardown.single)
2169 ret = -EINVAL;
2170 mutex_unlock(&cpuhp_state_mutex);
2171 if (ret)
2172 return ret;
2173
2174 st->fail = fail;
2175
2176 return count;
2177 }
2178
show_cpuhp_fail(struct device * dev,struct device_attribute * attr,char * buf)2179 static ssize_t show_cpuhp_fail(struct device *dev,
2180 struct device_attribute *attr, char *buf)
2181 {
2182 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2183
2184 return sprintf(buf, "%d\n", st->fail);
2185 }
2186
2187 static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
2188
2189 static struct attribute *cpuhp_cpu_attrs[] = {
2190 &dev_attr_state.attr,
2191 &dev_attr_target.attr,
2192 &dev_attr_fail.attr,
2193 NULL
2194 };
2195
2196 static const struct attribute_group cpuhp_cpu_attr_group = {
2197 .attrs = cpuhp_cpu_attrs,
2198 .name = "hotplug",
2199 NULL
2200 };
2201
show_cpuhp_states(struct device * dev,struct device_attribute * attr,char * buf)2202 static ssize_t show_cpuhp_states(struct device *dev,
2203 struct device_attribute *attr, char *buf)
2204 {
2205 ssize_t cur, res = 0;
2206 int i;
2207
2208 mutex_lock(&cpuhp_state_mutex);
2209 for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2210 struct cpuhp_step *sp = cpuhp_get_step(i);
2211
2212 if (sp->name) {
2213 cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2214 buf += cur;
2215 res += cur;
2216 }
2217 }
2218 mutex_unlock(&cpuhp_state_mutex);
2219 return res;
2220 }
2221 static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
2222
2223 static struct attribute *cpuhp_cpu_root_attrs[] = {
2224 &dev_attr_states.attr,
2225 NULL
2226 };
2227
2228 static const struct attribute_group cpuhp_cpu_root_attr_group = {
2229 .attrs = cpuhp_cpu_root_attrs,
2230 .name = "hotplug",
2231 NULL
2232 };
2233
2234 #ifdef CONFIG_HOTPLUG_SMT
2235
2236 static ssize_t
__store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2237 __store_smt_control(struct device *dev, struct device_attribute *attr,
2238 const char *buf, size_t count)
2239 {
2240 int ctrlval, ret;
2241
2242 if (sysfs_streq(buf, "on"))
2243 ctrlval = CPU_SMT_ENABLED;
2244 else if (sysfs_streq(buf, "off"))
2245 ctrlval = CPU_SMT_DISABLED;
2246 else if (sysfs_streq(buf, "forceoff"))
2247 ctrlval = CPU_SMT_FORCE_DISABLED;
2248 else
2249 return -EINVAL;
2250
2251 if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2252 return -EPERM;
2253
2254 if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2255 return -ENODEV;
2256
2257 ret = lock_device_hotplug_sysfs();
2258 if (ret)
2259 return ret;
2260
2261 if (ctrlval != cpu_smt_control) {
2262 switch (ctrlval) {
2263 case CPU_SMT_ENABLED:
2264 ret = cpuhp_smt_enable();
2265 break;
2266 case CPU_SMT_DISABLED:
2267 case CPU_SMT_FORCE_DISABLED:
2268 ret = cpuhp_smt_disable(ctrlval);
2269 break;
2270 }
2271 }
2272
2273 unlock_device_hotplug();
2274 return ret ? ret : count;
2275 }
2276
2277 #else /* !CONFIG_HOTPLUG_SMT */
2278 static ssize_t
__store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2279 __store_smt_control(struct device *dev, struct device_attribute *attr,
2280 const char *buf, size_t count)
2281 {
2282 return -ENODEV;
2283 }
2284 #endif /* CONFIG_HOTPLUG_SMT */
2285
2286 static const char *smt_states[] = {
2287 [CPU_SMT_ENABLED] = "on",
2288 [CPU_SMT_DISABLED] = "off",
2289 [CPU_SMT_FORCE_DISABLED] = "forceoff",
2290 [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2291 [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
2292 };
2293
2294 static ssize_t
show_smt_control(struct device * dev,struct device_attribute * attr,char * buf)2295 show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
2296 {
2297 const char *state = smt_states[cpu_smt_control];
2298
2299 return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
2300 }
2301
2302 static ssize_t
store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2303 store_smt_control(struct device *dev, struct device_attribute *attr,
2304 const char *buf, size_t count)
2305 {
2306 return __store_smt_control(dev, attr, buf, count);
2307 }
2308 static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2309
2310 static ssize_t
show_smt_active(struct device * dev,struct device_attribute * attr,char * buf)2311 show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
2312 {
2313 return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2314 }
2315 static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2316
2317 static struct attribute *cpuhp_smt_attrs[] = {
2318 &dev_attr_control.attr,
2319 &dev_attr_active.attr,
2320 NULL
2321 };
2322
2323 static const struct attribute_group cpuhp_smt_attr_group = {
2324 .attrs = cpuhp_smt_attrs,
2325 .name = "smt",
2326 NULL
2327 };
2328
cpu_smt_sysfs_init(void)2329 static int __init cpu_smt_sysfs_init(void)
2330 {
2331 return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2332 &cpuhp_smt_attr_group);
2333 }
2334
cpuhp_sysfs_init(void)2335 static int __init cpuhp_sysfs_init(void)
2336 {
2337 int cpu, ret;
2338
2339 ret = cpu_smt_sysfs_init();
2340 if (ret)
2341 return ret;
2342
2343 ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
2344 &cpuhp_cpu_root_attr_group);
2345 if (ret)
2346 return ret;
2347
2348 for_each_possible_cpu(cpu) {
2349 struct device *dev = get_cpu_device(cpu);
2350
2351 if (!dev)
2352 continue;
2353 ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2354 if (ret)
2355 return ret;
2356 }
2357 return 0;
2358 }
2359 device_initcall(cpuhp_sysfs_init);
2360 #endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2361
2362 /*
2363 * cpu_bit_bitmap[] is a special, "compressed" data structure that
2364 * represents all NR_CPUS bits binary values of 1<<nr.
2365 *
2366 * It is used by cpumask_of() to get a constant address to a CPU
2367 * mask value that has a single bit set only.
2368 */
2369
2370 /* cpu_bit_bitmap[0] is empty - so we can back into it */
2371 #define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
2372 #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2373 #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2374 #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2375
2376 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2377
2378 MASK_DECLARE_8(0), MASK_DECLARE_8(8),
2379 MASK_DECLARE_8(16), MASK_DECLARE_8(24),
2380 #if BITS_PER_LONG > 32
2381 MASK_DECLARE_8(32), MASK_DECLARE_8(40),
2382 MASK_DECLARE_8(48), MASK_DECLARE_8(56),
2383 #endif
2384 };
2385 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2386
2387 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2388 EXPORT_SYMBOL(cpu_all_bits);
2389
2390 #ifdef CONFIG_INIT_ALL_POSSIBLE
2391 struct cpumask __cpu_possible_mask __read_mostly
2392 = {CPU_BITS_ALL};
2393 #else
2394 struct cpumask __cpu_possible_mask __read_mostly;
2395 #endif
2396 EXPORT_SYMBOL(__cpu_possible_mask);
2397
2398 struct cpumask __cpu_online_mask __read_mostly;
2399 EXPORT_SYMBOL(__cpu_online_mask);
2400
2401 struct cpumask __cpu_present_mask __read_mostly;
2402 EXPORT_SYMBOL(__cpu_present_mask);
2403
2404 struct cpumask __cpu_active_mask __read_mostly;
2405 EXPORT_SYMBOL(__cpu_active_mask);
2406
2407 atomic_t __num_online_cpus __read_mostly;
2408 EXPORT_SYMBOL(__num_online_cpus);
2409
init_cpu_present(const struct cpumask * src)2410 void init_cpu_present(const struct cpumask *src)
2411 {
2412 cpumask_copy(&__cpu_present_mask, src);
2413 }
2414
init_cpu_possible(const struct cpumask * src)2415 void init_cpu_possible(const struct cpumask *src)
2416 {
2417 cpumask_copy(&__cpu_possible_mask, src);
2418 }
2419
init_cpu_online(const struct cpumask * src)2420 void init_cpu_online(const struct cpumask *src)
2421 {
2422 cpumask_copy(&__cpu_online_mask, src);
2423 }
2424
set_cpu_online(unsigned int cpu,bool online)2425 void set_cpu_online(unsigned int cpu, bool online)
2426 {
2427 /*
2428 * atomic_inc/dec() is required to handle the horrid abuse of this
2429 * function by the reboot and kexec code which invoke it from
2430 * IPI/NMI broadcasts when shutting down CPUs. Invocation from
2431 * regular CPU hotplug is properly serialized.
2432 *
2433 * Note, that the fact that __num_online_cpus is of type atomic_t
2434 * does not protect readers which are not serialized against
2435 * concurrent hotplug operations.
2436 */
2437 if (online) {
2438 if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
2439 atomic_inc(&__num_online_cpus);
2440 } else {
2441 if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
2442 atomic_dec(&__num_online_cpus);
2443 }
2444 }
2445
2446 /*
2447 * Activate the first processor.
2448 */
boot_cpu_init(void)2449 void __init boot_cpu_init(void)
2450 {
2451 int cpu = smp_processor_id();
2452
2453 /* Mark the boot cpu "present", "online" etc for SMP and UP case */
2454 set_cpu_online(cpu, true);
2455 set_cpu_active(cpu, true);
2456 set_cpu_present(cpu, true);
2457 set_cpu_possible(cpu, true);
2458
2459 #ifdef CONFIG_SMP
2460 __boot_cpu_id = cpu;
2461 #endif
2462 }
2463
2464 /*
2465 * Must be called _AFTER_ setting up the per_cpu areas
2466 */
boot_cpu_hotplug_init(void)2467 void __init boot_cpu_hotplug_init(void)
2468 {
2469 #ifdef CONFIG_SMP
2470 cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2471 #endif
2472 this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2473 }
2474
2475 /*
2476 * These are used for a global "mitigations=" cmdline option for toggling
2477 * optional CPU mitigations.
2478 */
2479 enum cpu_mitigations {
2480 CPU_MITIGATIONS_OFF,
2481 CPU_MITIGATIONS_AUTO,
2482 CPU_MITIGATIONS_AUTO_NOSMT,
2483 };
2484
2485 static enum cpu_mitigations cpu_mitigations __ro_after_init =
2486 CPU_MITIGATIONS_AUTO;
2487
mitigations_parse_cmdline(char * arg)2488 static int __init mitigations_parse_cmdline(char *arg)
2489 {
2490 if (!strcmp(arg, "off"))
2491 cpu_mitigations = CPU_MITIGATIONS_OFF;
2492 else if (!strcmp(arg, "auto"))
2493 cpu_mitigations = CPU_MITIGATIONS_AUTO;
2494 else if (!strcmp(arg, "auto,nosmt"))
2495 cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2496 else
2497 pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
2498 arg);
2499
2500 return 0;
2501 }
2502 early_param("mitigations", mitigations_parse_cmdline);
2503
2504 /* mitigations=off */
cpu_mitigations_off(void)2505 bool cpu_mitigations_off(void)
2506 {
2507 return cpu_mitigations == CPU_MITIGATIONS_OFF;
2508 }
2509 EXPORT_SYMBOL_GPL(cpu_mitigations_off);
2510
2511 /* mitigations=auto,nosmt */
cpu_mitigations_auto_nosmt(void)2512 bool cpu_mitigations_auto_nosmt(void)
2513 {
2514 return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
2515 }
2516 EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
2517