1 /* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
6 #include <linux/sched/mm.h>
7 #include <linux/proc_fs.h>
8 #include <linux/smp.h>
9 #include <linux/init.h>
10 #include <linux/notifier.h>
11 #include <linux/sched/signal.h>
12 #include <linux/sched/hotplug.h>
13 #include <linux/sched/isolation.h>
14 #include <linux/sched/task.h>
15 #include <linux/sched/smt.h>
16 #include <linux/unistd.h>
17 #include <linux/cpu.h>
18 #include <linux/oom.h>
19 #include <linux/rcupdate.h>
20 #include <linux/export.h>
21 #include <linux/bug.h>
22 #include <linux/kthread.h>
23 #include <linux/stop_machine.h>
24 #include <linux/mutex.h>
25 #include <linux/gfp.h>
26 #include <linux/suspend.h>
27 #include <linux/lockdep.h>
28 #include <linux/tick.h>
29 #include <linux/irq.h>
30 #include <linux/nmi.h>
31 #include <linux/smpboot.h>
32 #include <linux/relay.h>
33 #include <linux/slab.h>
34 #include <linux/scs.h>
35 #include <linux/percpu-rwsem.h>
36 #include <linux/cpuset.h>
37 #include <linux/random.h>
38 #include <linux/cc_platform.h>
39
40 #include <trace/events/power.h>
41 #define CREATE_TRACE_POINTS
42 #include <trace/events/cpuhp.h>
43
44 #include "smpboot.h"
45
46 /**
47 * struct cpuhp_cpu_state - Per cpu hotplug state storage
48 * @state: The current cpu state
49 * @target: The target state
50 * @fail: Current CPU hotplug callback state
51 * @thread: Pointer to the hotplug thread
52 * @should_run: Thread should execute
53 * @rollback: Perform a rollback
54 * @single: Single callback invocation
55 * @bringup: Single callback bringup or teardown selector
56 * @cpu: CPU number
57 * @node: Remote CPU node; for multi-instance, do a
58 * single entry callback for install/remove
59 * @last: For multi-instance rollback, remember how far we got
60 * @cb_state: The state for a single callback (install/uninstall)
61 * @result: Result of the operation
62 * @done_up: Signal completion to the issuer of the task for cpu-up
63 * @done_down: Signal completion to the issuer of the task for cpu-down
64 */
65 struct cpuhp_cpu_state {
66 enum cpuhp_state state;
67 enum cpuhp_state target;
68 enum cpuhp_state fail;
69 #ifdef CONFIG_SMP
70 struct task_struct *thread;
71 bool should_run;
72 bool rollback;
73 bool single;
74 bool bringup;
75 struct hlist_node *node;
76 struct hlist_node *last;
77 enum cpuhp_state cb_state;
78 int result;
79 struct completion done_up;
80 struct completion done_down;
81 #endif
82 };
83
84 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
85 .fail = CPUHP_INVALID,
86 };
87
88 #ifdef CONFIG_SMP
89 cpumask_t cpus_booted_once_mask;
90 #endif
91
92 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
93 static struct lockdep_map cpuhp_state_up_map =
94 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
95 static struct lockdep_map cpuhp_state_down_map =
96 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
97
98
cpuhp_lock_acquire(bool bringup)99 static inline void cpuhp_lock_acquire(bool bringup)
100 {
101 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
102 }
103
cpuhp_lock_release(bool bringup)104 static inline void cpuhp_lock_release(bool bringup)
105 {
106 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
107 }
108 #else
109
cpuhp_lock_acquire(bool bringup)110 static inline void cpuhp_lock_acquire(bool bringup) { }
cpuhp_lock_release(bool bringup)111 static inline void cpuhp_lock_release(bool bringup) { }
112
113 #endif
114
115 /**
116 * struct cpuhp_step - Hotplug state machine step
117 * @name: Name of the step
118 * @startup: Startup function of the step
119 * @teardown: Teardown function of the step
120 * @cant_stop: Bringup/teardown can't be stopped at this step
121 * @multi_instance: State has multiple instances which get added afterwards
122 */
123 struct cpuhp_step {
124 const char *name;
125 union {
126 int (*single)(unsigned int cpu);
127 int (*multi)(unsigned int cpu,
128 struct hlist_node *node);
129 } startup;
130 union {
131 int (*single)(unsigned int cpu);
132 int (*multi)(unsigned int cpu,
133 struct hlist_node *node);
134 } teardown;
135 /* private: */
136 struct hlist_head list;
137 /* public: */
138 bool cant_stop;
139 bool multi_instance;
140 };
141
142 static DEFINE_MUTEX(cpuhp_state_mutex);
143 static struct cpuhp_step cpuhp_hp_states[];
144
cpuhp_get_step(enum cpuhp_state state)145 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
146 {
147 return cpuhp_hp_states + state;
148 }
149
cpuhp_step_empty(bool bringup,struct cpuhp_step * step)150 static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
151 {
152 return bringup ? !step->startup.single : !step->teardown.single;
153 }
154
155 /**
156 * cpuhp_invoke_callback - Invoke the callbacks for a given state
157 * @cpu: The cpu for which the callback should be invoked
158 * @state: The state to do callbacks for
159 * @bringup: True if the bringup callback should be invoked
160 * @node: For multi-instance, do a single entry callback for install/remove
161 * @lastp: For multi-instance rollback, remember how far we got
162 *
163 * Called from cpu hotplug and from the state register machinery.
164 *
165 * Return: %0 on success or a negative errno code
166 */
cpuhp_invoke_callback(unsigned int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node,struct hlist_node ** lastp)167 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
168 bool bringup, struct hlist_node *node,
169 struct hlist_node **lastp)
170 {
171 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
172 struct cpuhp_step *step = cpuhp_get_step(state);
173 int (*cbm)(unsigned int cpu, struct hlist_node *node);
174 int (*cb)(unsigned int cpu);
175 int ret, cnt;
176
177 if (st->fail == state) {
178 st->fail = CPUHP_INVALID;
179 return -EAGAIN;
180 }
181
182 if (cpuhp_step_empty(bringup, step)) {
183 WARN_ON_ONCE(1);
184 return 0;
185 }
186
187 if (!step->multi_instance) {
188 WARN_ON_ONCE(lastp && *lastp);
189 cb = bringup ? step->startup.single : step->teardown.single;
190
191 trace_cpuhp_enter(cpu, st->target, state, cb);
192 ret = cb(cpu);
193 trace_cpuhp_exit(cpu, st->state, state, ret);
194 return ret;
195 }
196 cbm = bringup ? step->startup.multi : step->teardown.multi;
197
198 /* Single invocation for instance add/remove */
199 if (node) {
200 WARN_ON_ONCE(lastp && *lastp);
201 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
202 ret = cbm(cpu, node);
203 trace_cpuhp_exit(cpu, st->state, state, ret);
204 return ret;
205 }
206
207 /* State transition. Invoke on all instances */
208 cnt = 0;
209 hlist_for_each(node, &step->list) {
210 if (lastp && node == *lastp)
211 break;
212
213 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
214 ret = cbm(cpu, node);
215 trace_cpuhp_exit(cpu, st->state, state, ret);
216 if (ret) {
217 if (!lastp)
218 goto err;
219
220 *lastp = node;
221 return ret;
222 }
223 cnt++;
224 }
225 if (lastp)
226 *lastp = NULL;
227 return 0;
228 err:
229 /* Rollback the instances if one failed */
230 cbm = !bringup ? step->startup.multi : step->teardown.multi;
231 if (!cbm)
232 return ret;
233
234 hlist_for_each(node, &step->list) {
235 if (!cnt--)
236 break;
237
238 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
239 ret = cbm(cpu, node);
240 trace_cpuhp_exit(cpu, st->state, state, ret);
241 /*
242 * Rollback must not fail,
243 */
244 WARN_ON_ONCE(ret);
245 }
246 return ret;
247 }
248
249 #ifdef CONFIG_SMP
cpuhp_is_ap_state(enum cpuhp_state state)250 static bool cpuhp_is_ap_state(enum cpuhp_state state)
251 {
252 /*
253 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
254 * purposes as that state is handled explicitly in cpu_down.
255 */
256 return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
257 }
258
wait_for_ap_thread(struct cpuhp_cpu_state * st,bool bringup)259 static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
260 {
261 struct completion *done = bringup ? &st->done_up : &st->done_down;
262 wait_for_completion(done);
263 }
264
complete_ap_thread(struct cpuhp_cpu_state * st,bool bringup)265 static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
266 {
267 struct completion *done = bringup ? &st->done_up : &st->done_down;
268 complete(done);
269 }
270
271 /*
272 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
273 */
cpuhp_is_atomic_state(enum cpuhp_state state)274 static bool cpuhp_is_atomic_state(enum cpuhp_state state)
275 {
276 return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
277 }
278
279 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
280 static DEFINE_MUTEX(cpu_add_remove_lock);
281 bool cpuhp_tasks_frozen;
282 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
283
284 /*
285 * The following two APIs (cpu_maps_update_begin/done) must be used when
286 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
287 */
cpu_maps_update_begin(void)288 void cpu_maps_update_begin(void)
289 {
290 mutex_lock(&cpu_add_remove_lock);
291 }
292
cpu_maps_update_done(void)293 void cpu_maps_update_done(void)
294 {
295 mutex_unlock(&cpu_add_remove_lock);
296 }
297
298 /*
299 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
300 * Should always be manipulated under cpu_add_remove_lock
301 */
302 static int cpu_hotplug_disabled;
303
304 #ifdef CONFIG_HOTPLUG_CPU
305
306 DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
307
cpus_read_lock(void)308 void cpus_read_lock(void)
309 {
310 percpu_down_read(&cpu_hotplug_lock);
311 }
312 EXPORT_SYMBOL_GPL(cpus_read_lock);
313
cpus_read_trylock(void)314 int cpus_read_trylock(void)
315 {
316 return percpu_down_read_trylock(&cpu_hotplug_lock);
317 }
318 EXPORT_SYMBOL_GPL(cpus_read_trylock);
319
cpus_read_unlock(void)320 void cpus_read_unlock(void)
321 {
322 percpu_up_read(&cpu_hotplug_lock);
323 }
324 EXPORT_SYMBOL_GPL(cpus_read_unlock);
325
cpus_write_lock(void)326 void cpus_write_lock(void)
327 {
328 percpu_down_write(&cpu_hotplug_lock);
329 }
330
cpus_write_unlock(void)331 void cpus_write_unlock(void)
332 {
333 percpu_up_write(&cpu_hotplug_lock);
334 }
335
lockdep_assert_cpus_held(void)336 void lockdep_assert_cpus_held(void)
337 {
338 /*
339 * We can't have hotplug operations before userspace starts running,
340 * and some init codepaths will knowingly not take the hotplug lock.
341 * This is all valid, so mute lockdep until it makes sense to report
342 * unheld locks.
343 */
344 if (system_state < SYSTEM_RUNNING)
345 return;
346
347 percpu_rwsem_assert_held(&cpu_hotplug_lock);
348 }
349
350 #ifdef CONFIG_LOCKDEP
lockdep_is_cpus_held(void)351 int lockdep_is_cpus_held(void)
352 {
353 return percpu_rwsem_is_held(&cpu_hotplug_lock);
354 }
355 #endif
356
lockdep_acquire_cpus_lock(void)357 static void lockdep_acquire_cpus_lock(void)
358 {
359 rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
360 }
361
lockdep_release_cpus_lock(void)362 static void lockdep_release_cpus_lock(void)
363 {
364 rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
365 }
366
367 /*
368 * Wait for currently running CPU hotplug operations to complete (if any) and
369 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
370 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
371 * hotplug path before performing hotplug operations. So acquiring that lock
372 * guarantees mutual exclusion from any currently running hotplug operations.
373 */
cpu_hotplug_disable(void)374 void cpu_hotplug_disable(void)
375 {
376 cpu_maps_update_begin();
377 cpu_hotplug_disabled++;
378 cpu_maps_update_done();
379 }
380 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
381
__cpu_hotplug_enable(void)382 static void __cpu_hotplug_enable(void)
383 {
384 if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
385 return;
386 cpu_hotplug_disabled--;
387 }
388
cpu_hotplug_enable(void)389 void cpu_hotplug_enable(void)
390 {
391 cpu_maps_update_begin();
392 __cpu_hotplug_enable();
393 cpu_maps_update_done();
394 }
395 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
396
397 #else
398
lockdep_acquire_cpus_lock(void)399 static void lockdep_acquire_cpus_lock(void)
400 {
401 }
402
lockdep_release_cpus_lock(void)403 static void lockdep_release_cpus_lock(void)
404 {
405 }
406
407 #endif /* CONFIG_HOTPLUG_CPU */
408
409 /*
410 * Architectures that need SMT-specific errata handling during SMT hotplug
411 * should override this.
412 */
arch_smt_update(void)413 void __weak arch_smt_update(void) { }
414
415 #ifdef CONFIG_HOTPLUG_SMT
416 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
417
cpu_smt_disable(bool force)418 void __init cpu_smt_disable(bool force)
419 {
420 if (!cpu_smt_possible())
421 return;
422
423 if (force) {
424 pr_info("SMT: Force disabled\n");
425 cpu_smt_control = CPU_SMT_FORCE_DISABLED;
426 } else {
427 pr_info("SMT: disabled\n");
428 cpu_smt_control = CPU_SMT_DISABLED;
429 }
430 }
431
432 /*
433 * The decision whether SMT is supported can only be done after the full
434 * CPU identification. Called from architecture code.
435 */
cpu_smt_check_topology(void)436 void __init cpu_smt_check_topology(void)
437 {
438 if (!topology_smt_supported())
439 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
440 }
441
smt_cmdline_disable(char * str)442 static int __init smt_cmdline_disable(char *str)
443 {
444 cpu_smt_disable(str && !strcmp(str, "force"));
445 return 0;
446 }
447 early_param("nosmt", smt_cmdline_disable);
448
449 /*
450 * For Archicture supporting partial SMT states check if the thread is allowed.
451 * Otherwise this has already been checked through cpu_smt_max_threads when
452 * setting the SMT level.
453 */
cpu_smt_thread_allowed(unsigned int cpu)454 static inline bool cpu_smt_thread_allowed(unsigned int cpu)
455 {
456 #ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC
457 return topology_smt_thread_allowed(cpu);
458 #else
459 return true;
460 #endif
461 }
462
cpu_bootable(unsigned int cpu)463 static inline bool cpu_bootable(unsigned int cpu)
464 {
465 if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
466 return true;
467
468 /* All CPUs are bootable if controls are not configured */
469 if (cpu_smt_control == CPU_SMT_NOT_IMPLEMENTED)
470 return true;
471
472 /* All CPUs are bootable if CPU is not SMT capable */
473 if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
474 return true;
475
476 if (topology_is_primary_thread(cpu))
477 return true;
478
479 /*
480 * On x86 it's required to boot all logical CPUs at least once so
481 * that the init code can get a chance to set CR4.MCE on each
482 * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
483 * core will shutdown the machine.
484 */
485 return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
486 }
487
488 /* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
cpu_smt_possible(void)489 bool cpu_smt_possible(void)
490 {
491 return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
492 cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
493 }
494 EXPORT_SYMBOL_GPL(cpu_smt_possible);
495 #else
cpu_bootable(unsigned int cpu)496 static inline bool cpu_bootable(unsigned int cpu) { return true; }
497 #endif
498
499 static inline enum cpuhp_state
cpuhp_set_state(int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)500 cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
501 {
502 enum cpuhp_state prev_state = st->state;
503 bool bringup = st->state < target;
504
505 st->rollback = false;
506 st->last = NULL;
507
508 st->target = target;
509 st->single = false;
510 st->bringup = bringup;
511 if (cpu_dying(cpu) != !bringup)
512 set_cpu_dying(cpu, !bringup);
513
514 return prev_state;
515 }
516
517 static inline void
cpuhp_reset_state(int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state prev_state)518 cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
519 enum cpuhp_state prev_state)
520 {
521 bool bringup = !st->bringup;
522
523 st->target = prev_state;
524
525 /*
526 * Already rolling back. No need invert the bringup value or to change
527 * the current state.
528 */
529 if (st->rollback)
530 return;
531
532 st->rollback = true;
533
534 /*
535 * If we have st->last we need to undo partial multi_instance of this
536 * state first. Otherwise start undo at the previous state.
537 */
538 if (!st->last) {
539 if (st->bringup)
540 st->state--;
541 else
542 st->state++;
543 }
544
545 st->bringup = bringup;
546 if (cpu_dying(cpu) != !bringup)
547 set_cpu_dying(cpu, !bringup);
548 }
549
550 /* Regular hotplug invocation of the AP hotplug thread */
__cpuhp_kick_ap(struct cpuhp_cpu_state * st)551 static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
552 {
553 if (!st->single && st->state == st->target)
554 return;
555
556 st->result = 0;
557 /*
558 * Make sure the above stores are visible before should_run becomes
559 * true. Paired with the mb() above in cpuhp_thread_fun()
560 */
561 smp_mb();
562 st->should_run = true;
563 wake_up_process(st->thread);
564 wait_for_ap_thread(st, st->bringup);
565 }
566
cpuhp_kick_ap(int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)567 static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
568 enum cpuhp_state target)
569 {
570 enum cpuhp_state prev_state;
571 int ret;
572
573 prev_state = cpuhp_set_state(cpu, st, target);
574 __cpuhp_kick_ap(st);
575 if ((ret = st->result)) {
576 cpuhp_reset_state(cpu, st, prev_state);
577 __cpuhp_kick_ap(st);
578 }
579
580 return ret;
581 }
582
bringup_wait_for_ap(unsigned int cpu)583 static int bringup_wait_for_ap(unsigned int cpu)
584 {
585 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
586
587 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
588 wait_for_ap_thread(st, true);
589 if (WARN_ON_ONCE((!cpu_online(cpu))))
590 return -ECANCELED;
591
592 /* Unpark the hotplug thread of the target cpu */
593 kthread_unpark(st->thread);
594
595 /*
596 * SMT soft disabling on X86 requires to bring the CPU out of the
597 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
598 * CPU marked itself as booted_once in notify_cpu_starting() so the
599 * cpu_bootable() check will now return false if this is not the
600 * primary sibling.
601 */
602 if (!cpu_bootable(cpu))
603 return -ECANCELED;
604
605 if (st->target <= CPUHP_AP_ONLINE_IDLE)
606 return 0;
607
608 return cpuhp_kick_ap(cpu, st, st->target);
609 }
610
bringup_cpu(unsigned int cpu)611 static int bringup_cpu(unsigned int cpu)
612 {
613 struct task_struct *idle = idle_thread_get(cpu);
614 int ret;
615
616 /*
617 * Reset stale stack state from the last time this CPU was online.
618 */
619 scs_task_reset(idle);
620 kasan_unpoison_task_stack(idle);
621
622 /*
623 * Some architectures have to walk the irq descriptors to
624 * setup the vector space for the cpu which comes online.
625 * Prevent irq alloc/free across the bringup.
626 */
627 irq_lock_sparse();
628
629 /* Arch-specific enabling code. */
630 ret = __cpu_up(cpu, idle);
631 irq_unlock_sparse();
632 if (ret)
633 return ret;
634 return bringup_wait_for_ap(cpu);
635 }
636
finish_cpu(unsigned int cpu)637 static int finish_cpu(unsigned int cpu)
638 {
639 struct task_struct *idle = idle_thread_get(cpu);
640 struct mm_struct *mm = idle->active_mm;
641
642 /*
643 * idle_task_exit() will have switched to &init_mm, now
644 * clean up any remaining active_mm state.
645 */
646 if (mm != &init_mm)
647 idle->active_mm = &init_mm;
648 mmdrop(mm);
649 return 0;
650 }
651
652 /*
653 * Hotplug state machine related functions
654 */
655
656 /*
657 * Get the next state to run. Empty ones will be skipped. Returns true if a
658 * state must be run.
659 *
660 * st->state will be modified ahead of time, to match state_to_run, as if it
661 * has already ran.
662 */
cpuhp_next_state(bool bringup,enum cpuhp_state * state_to_run,struct cpuhp_cpu_state * st,enum cpuhp_state target)663 static bool cpuhp_next_state(bool bringup,
664 enum cpuhp_state *state_to_run,
665 struct cpuhp_cpu_state *st,
666 enum cpuhp_state target)
667 {
668 do {
669 if (bringup) {
670 if (st->state >= target)
671 return false;
672
673 *state_to_run = ++st->state;
674 } else {
675 if (st->state <= target)
676 return false;
677
678 *state_to_run = st->state--;
679 }
680
681 if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
682 break;
683 } while (true);
684
685 return true;
686 }
687
__cpuhp_invoke_callback_range(bool bringup,unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target,bool nofail)688 static int __cpuhp_invoke_callback_range(bool bringup,
689 unsigned int cpu,
690 struct cpuhp_cpu_state *st,
691 enum cpuhp_state target,
692 bool nofail)
693 {
694 enum cpuhp_state state;
695 int ret = 0;
696
697 while (cpuhp_next_state(bringup, &state, st, target)) {
698 int err;
699
700 err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
701 if (!err)
702 continue;
703
704 if (nofail) {
705 pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
706 cpu, bringup ? "UP" : "DOWN",
707 cpuhp_get_step(st->state)->name,
708 st->state, err);
709 ret = -1;
710 } else {
711 ret = err;
712 break;
713 }
714 }
715
716 return ret;
717 }
718
cpuhp_invoke_callback_range(bool bringup,unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)719 static inline int cpuhp_invoke_callback_range(bool bringup,
720 unsigned int cpu,
721 struct cpuhp_cpu_state *st,
722 enum cpuhp_state target)
723 {
724 return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
725 }
726
cpuhp_invoke_callback_range_nofail(bool bringup,unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)727 static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
728 unsigned int cpu,
729 struct cpuhp_cpu_state *st,
730 enum cpuhp_state target)
731 {
732 __cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
733 }
734
can_rollback_cpu(struct cpuhp_cpu_state * st)735 static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
736 {
737 if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
738 return true;
739 /*
740 * When CPU hotplug is disabled, then taking the CPU down is not
741 * possible because takedown_cpu() and the architecture and
742 * subsystem specific mechanisms are not available. So the CPU
743 * which would be completely unplugged again needs to stay around
744 * in the current state.
745 */
746 return st->state <= CPUHP_BRINGUP_CPU;
747 }
748
cpuhp_up_callbacks(unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)749 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
750 enum cpuhp_state target)
751 {
752 enum cpuhp_state prev_state = st->state;
753 int ret = 0;
754
755 ret = cpuhp_invoke_callback_range(true, cpu, st, target);
756 if (ret) {
757 pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
758 ret, cpu, cpuhp_get_step(st->state)->name,
759 st->state);
760
761 cpuhp_reset_state(cpu, st, prev_state);
762 if (can_rollback_cpu(st))
763 WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
764 prev_state));
765 }
766 return ret;
767 }
768
769 /*
770 * The cpu hotplug threads manage the bringup and teardown of the cpus
771 */
cpuhp_should_run(unsigned int cpu)772 static int cpuhp_should_run(unsigned int cpu)
773 {
774 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
775
776 return st->should_run;
777 }
778
779 /*
780 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
781 * callbacks when a state gets [un]installed at runtime.
782 *
783 * Each invocation of this function by the smpboot thread does a single AP
784 * state callback.
785 *
786 * It has 3 modes of operation:
787 * - single: runs st->cb_state
788 * - up: runs ++st->state, while st->state < st->target
789 * - down: runs st->state--, while st->state > st->target
790 *
791 * When complete or on error, should_run is cleared and the completion is fired.
792 */
cpuhp_thread_fun(unsigned int cpu)793 static void cpuhp_thread_fun(unsigned int cpu)
794 {
795 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
796 bool bringup = st->bringup;
797 enum cpuhp_state state;
798
799 if (WARN_ON_ONCE(!st->should_run))
800 return;
801
802 /*
803 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
804 * that if we see ->should_run we also see the rest of the state.
805 */
806 smp_mb();
807
808 /*
809 * The BP holds the hotplug lock, but we're now running on the AP,
810 * ensure that anybody asserting the lock is held, will actually find
811 * it so.
812 */
813 lockdep_acquire_cpus_lock();
814 cpuhp_lock_acquire(bringup);
815
816 if (st->single) {
817 state = st->cb_state;
818 st->should_run = false;
819 } else {
820 st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
821 if (!st->should_run)
822 goto end;
823 }
824
825 WARN_ON_ONCE(!cpuhp_is_ap_state(state));
826
827 if (cpuhp_is_atomic_state(state)) {
828 local_irq_disable();
829 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
830 local_irq_enable();
831
832 /*
833 * STARTING/DYING must not fail!
834 */
835 WARN_ON_ONCE(st->result);
836 } else {
837 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
838 }
839
840 if (st->result) {
841 /*
842 * If we fail on a rollback, we're up a creek without no
843 * paddle, no way forward, no way back. We loose, thanks for
844 * playing.
845 */
846 WARN_ON_ONCE(st->rollback);
847 st->should_run = false;
848 }
849
850 end:
851 cpuhp_lock_release(bringup);
852 lockdep_release_cpus_lock();
853
854 if (!st->should_run)
855 complete_ap_thread(st, bringup);
856 }
857
858 /* Invoke a single callback on a remote cpu */
859 static int
cpuhp_invoke_ap_callback(int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node)860 cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
861 struct hlist_node *node)
862 {
863 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
864 int ret;
865
866 if (!cpu_online(cpu))
867 return 0;
868
869 cpuhp_lock_acquire(false);
870 cpuhp_lock_release(false);
871
872 cpuhp_lock_acquire(true);
873 cpuhp_lock_release(true);
874
875 /*
876 * If we are up and running, use the hotplug thread. For early calls
877 * we invoke the thread function directly.
878 */
879 if (!st->thread)
880 return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
881
882 st->rollback = false;
883 st->last = NULL;
884
885 st->node = node;
886 st->bringup = bringup;
887 st->cb_state = state;
888 st->single = true;
889
890 __cpuhp_kick_ap(st);
891
892 /*
893 * If we failed and did a partial, do a rollback.
894 */
895 if ((ret = st->result) && st->last) {
896 st->rollback = true;
897 st->bringup = !bringup;
898
899 __cpuhp_kick_ap(st);
900 }
901
902 /*
903 * Clean up the leftovers so the next hotplug operation wont use stale
904 * data.
905 */
906 st->node = st->last = NULL;
907 return ret;
908 }
909
cpuhp_kick_ap_work(unsigned int cpu)910 static int cpuhp_kick_ap_work(unsigned int cpu)
911 {
912 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
913 enum cpuhp_state prev_state = st->state;
914 int ret;
915
916 cpuhp_lock_acquire(false);
917 cpuhp_lock_release(false);
918
919 cpuhp_lock_acquire(true);
920 cpuhp_lock_release(true);
921
922 trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
923 ret = cpuhp_kick_ap(cpu, st, st->target);
924 trace_cpuhp_exit(cpu, st->state, prev_state, ret);
925
926 return ret;
927 }
928
929 static struct smp_hotplug_thread cpuhp_threads = {
930 .store = &cpuhp_state.thread,
931 .thread_should_run = cpuhp_should_run,
932 .thread_fn = cpuhp_thread_fun,
933 .thread_comm = "cpuhp/%u",
934 .selfparking = true,
935 };
936
cpuhp_init_state(void)937 static __init void cpuhp_init_state(void)
938 {
939 struct cpuhp_cpu_state *st;
940 int cpu;
941
942 for_each_possible_cpu(cpu) {
943 st = per_cpu_ptr(&cpuhp_state, cpu);
944 init_completion(&st->done_up);
945 init_completion(&st->done_down);
946 }
947 }
948
cpuhp_threads_init(void)949 void __init cpuhp_threads_init(void)
950 {
951 cpuhp_init_state();
952 BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
953 kthread_unpark(this_cpu_read(cpuhp_state.thread));
954 }
955
956 /*
957 *
958 * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
959 * protected region.
960 *
961 * The operation is still serialized against concurrent CPU hotplug via
962 * cpu_add_remove_lock, i.e. CPU map protection. But it is _not_
963 * serialized against other hotplug related activity like adding or
964 * removing of state callbacks and state instances, which invoke either the
965 * startup or the teardown callback of the affected state.
966 *
967 * This is required for subsystems which are unfixable vs. CPU hotplug and
968 * evade lock inversion problems by scheduling work which has to be
969 * completed _before_ cpu_up()/_cpu_down() returns.
970 *
971 * Don't even think about adding anything to this for any new code or even
972 * drivers. It's only purpose is to keep existing lock order trainwrecks
973 * working.
974 *
975 * For cpu_down() there might be valid reasons to finish cleanups which are
976 * not required to be done under cpu_hotplug_lock, but that's a different
977 * story and would be not invoked via this.
978 */
cpu_up_down_serialize_trainwrecks(bool tasks_frozen)979 static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
980 {
981 /*
982 * cpusets delegate hotplug operations to a worker to "solve" the
983 * lock order problems. Wait for the worker, but only if tasks are
984 * _not_ frozen (suspend, hibernate) as that would wait forever.
985 *
986 * The wait is required because otherwise the hotplug operation
987 * returns with inconsistent state, which could even be observed in
988 * user space when a new CPU is brought up. The CPU plug uevent
989 * would be delivered and user space reacting on it would fail to
990 * move tasks to the newly plugged CPU up to the point where the
991 * work has finished because up to that point the newly plugged CPU
992 * is not assignable in cpusets/cgroups. On unplug that's not
993 * necessarily a visible issue, but it is still inconsistent state,
994 * which is the real problem which needs to be "fixed". This can't
995 * prevent the transient state between scheduling the work and
996 * returning from waiting for it.
997 */
998 if (!tasks_frozen)
999 cpuset_wait_for_hotplug();
1000 }
1001
1002 #ifdef CONFIG_HOTPLUG_CPU
1003 #ifndef arch_clear_mm_cpumask_cpu
1004 #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
1005 #endif
1006
1007 /**
1008 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
1009 * @cpu: a CPU id
1010 *
1011 * This function walks all processes, finds a valid mm struct for each one and
1012 * then clears a corresponding bit in mm's cpumask. While this all sounds
1013 * trivial, there are various non-obvious corner cases, which this function
1014 * tries to solve in a safe manner.
1015 *
1016 * Also note that the function uses a somewhat relaxed locking scheme, so it may
1017 * be called only for an already offlined CPU.
1018 */
clear_tasks_mm_cpumask(int cpu)1019 void clear_tasks_mm_cpumask(int cpu)
1020 {
1021 struct task_struct *p;
1022
1023 /*
1024 * This function is called after the cpu is taken down and marked
1025 * offline, so its not like new tasks will ever get this cpu set in
1026 * their mm mask. -- Peter Zijlstra
1027 * Thus, we may use rcu_read_lock() here, instead of grabbing
1028 * full-fledged tasklist_lock.
1029 */
1030 WARN_ON(cpu_online(cpu));
1031 rcu_read_lock();
1032 for_each_process(p) {
1033 struct task_struct *t;
1034
1035 /*
1036 * Main thread might exit, but other threads may still have
1037 * a valid mm. Find one.
1038 */
1039 t = find_lock_task_mm(p);
1040 if (!t)
1041 continue;
1042 arch_clear_mm_cpumask_cpu(cpu, t->mm);
1043 task_unlock(t);
1044 }
1045 rcu_read_unlock();
1046 }
1047
1048 /* Take this CPU down. */
take_cpu_down(void * _param)1049 static int take_cpu_down(void *_param)
1050 {
1051 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1052 enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
1053 int err, cpu = smp_processor_id();
1054
1055 /* Ensure this CPU doesn't handle any more interrupts. */
1056 err = __cpu_disable();
1057 if (err < 0)
1058 return err;
1059
1060 /*
1061 * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
1062 * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
1063 */
1064 WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
1065
1066 /*
1067 * Invoke the former CPU_DYING callbacks. DYING must not fail!
1068 */
1069 cpuhp_invoke_callback_range_nofail(false, cpu, st, target);
1070
1071 /* Give up timekeeping duties */
1072 tick_handover_do_timer();
1073 /* Remove CPU from timer broadcasting */
1074 tick_offline_cpu(cpu);
1075 /* Park the stopper thread */
1076 stop_machine_park(cpu);
1077 return 0;
1078 }
1079
takedown_cpu(unsigned int cpu)1080 static int takedown_cpu(unsigned int cpu)
1081 {
1082 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1083 int err;
1084
1085 /* Park the smpboot threads */
1086 kthread_park(st->thread);
1087
1088 /*
1089 * Prevent irq alloc/free while the dying cpu reorganizes the
1090 * interrupt affinities.
1091 */
1092 irq_lock_sparse();
1093
1094 /*
1095 * So now all preempt/rcu users must observe !cpu_active().
1096 */
1097 err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
1098 if (err) {
1099 /* CPU refused to die */
1100 irq_unlock_sparse();
1101 /* Unpark the hotplug thread so we can rollback there */
1102 kthread_unpark(st->thread);
1103 return err;
1104 }
1105 BUG_ON(cpu_online(cpu));
1106
1107 /*
1108 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
1109 * all runnable tasks from the CPU, there's only the idle task left now
1110 * that the migration thread is done doing the stop_machine thing.
1111 *
1112 * Wait for the stop thread to go away.
1113 */
1114 wait_for_ap_thread(st, false);
1115 BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
1116
1117 /* Interrupts are moved away from the dying cpu, reenable alloc/free */
1118 irq_unlock_sparse();
1119
1120 hotplug_cpu__broadcast_tick_pull(cpu);
1121 /* This actually kills the CPU. */
1122 __cpu_die(cpu);
1123
1124 tick_cleanup_dead_cpu(cpu);
1125 rcutree_migrate_callbacks(cpu);
1126 return 0;
1127 }
1128
cpuhp_complete_idle_dead(void * arg)1129 static void cpuhp_complete_idle_dead(void *arg)
1130 {
1131 struct cpuhp_cpu_state *st = arg;
1132
1133 complete_ap_thread(st, false);
1134 }
1135
cpuhp_report_idle_dead(void)1136 void cpuhp_report_idle_dead(void)
1137 {
1138 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1139
1140 BUG_ON(st->state != CPUHP_AP_OFFLINE);
1141 rcu_report_dead(smp_processor_id());
1142 st->state = CPUHP_AP_IDLE_DEAD;
1143 /*
1144 * We cannot call complete after rcu_report_dead() so we delegate it
1145 * to an online cpu.
1146 */
1147 smp_call_function_single(cpumask_first(cpu_online_mask),
1148 cpuhp_complete_idle_dead, st, 0);
1149 }
1150
cpuhp_down_callbacks(unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)1151 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1152 enum cpuhp_state target)
1153 {
1154 enum cpuhp_state prev_state = st->state;
1155 int ret = 0;
1156
1157 ret = cpuhp_invoke_callback_range(false, cpu, st, target);
1158 if (ret) {
1159 pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
1160 ret, cpu, cpuhp_get_step(st->state)->name,
1161 st->state);
1162
1163 cpuhp_reset_state(cpu, st, prev_state);
1164
1165 if (st->state < prev_state)
1166 WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
1167 prev_state));
1168 }
1169
1170 return ret;
1171 }
1172
1173 /* Requires cpu_add_remove_lock to be held */
_cpu_down(unsigned int cpu,int tasks_frozen,enum cpuhp_state target)1174 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
1175 enum cpuhp_state target)
1176 {
1177 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1178 int prev_state, ret = 0;
1179
1180 if (num_online_cpus() == 1)
1181 return -EBUSY;
1182
1183 if (!cpu_present(cpu))
1184 return -EINVAL;
1185
1186 cpus_write_lock();
1187
1188 cpuhp_tasks_frozen = tasks_frozen;
1189
1190 prev_state = cpuhp_set_state(cpu, st, target);
1191 /*
1192 * If the current CPU state is in the range of the AP hotplug thread,
1193 * then we need to kick the thread.
1194 */
1195 if (st->state > CPUHP_TEARDOWN_CPU) {
1196 st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1197 ret = cpuhp_kick_ap_work(cpu);
1198 /*
1199 * The AP side has done the error rollback already. Just
1200 * return the error code..
1201 */
1202 if (ret)
1203 goto out;
1204
1205 /*
1206 * We might have stopped still in the range of the AP hotplug
1207 * thread. Nothing to do anymore.
1208 */
1209 if (st->state > CPUHP_TEARDOWN_CPU)
1210 goto out;
1211
1212 st->target = target;
1213 }
1214 /*
1215 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1216 * to do the further cleanups.
1217 */
1218 ret = cpuhp_down_callbacks(cpu, st, target);
1219 if (ret && st->state < prev_state) {
1220 if (st->state == CPUHP_TEARDOWN_CPU) {
1221 cpuhp_reset_state(cpu, st, prev_state);
1222 __cpuhp_kick_ap(st);
1223 } else {
1224 WARN(1, "DEAD callback error for CPU%d", cpu);
1225 }
1226 }
1227
1228 out:
1229 cpus_write_unlock();
1230 /*
1231 * Do post unplug cleanup. This is still protected against
1232 * concurrent CPU hotplug via cpu_add_remove_lock.
1233 */
1234 lockup_detector_cleanup();
1235 arch_smt_update();
1236 cpu_up_down_serialize_trainwrecks(tasks_frozen);
1237 return ret;
1238 }
1239
1240 struct cpu_down_work {
1241 unsigned int cpu;
1242 enum cpuhp_state target;
1243 };
1244
__cpu_down_maps_locked(void * arg)1245 static long __cpu_down_maps_locked(void *arg)
1246 {
1247 struct cpu_down_work *work = arg;
1248
1249 return _cpu_down(work->cpu, 0, work->target);
1250 }
1251
cpu_down_maps_locked(unsigned int cpu,enum cpuhp_state target)1252 static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1253 {
1254 struct cpu_down_work work = { .cpu = cpu, .target = target, };
1255
1256 /*
1257 * If the platform does not support hotplug, report it explicitly to
1258 * differentiate it from a transient offlining failure.
1259 */
1260 if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
1261 return -EOPNOTSUPP;
1262 if (cpu_hotplug_disabled)
1263 return -EBUSY;
1264
1265 /*
1266 * Ensure that the control task does not run on the to be offlined
1267 * CPU to prevent a deadlock against cfs_b->period_timer.
1268 * Also keep at least one housekeeping cpu onlined to avoid generating
1269 * an empty sched_domain span.
1270 */
1271 for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) {
1272 if (cpu != work.cpu)
1273 return work_on_cpu(cpu, __cpu_down_maps_locked, &work);
1274 }
1275 return -EBUSY;
1276 }
1277
cpu_down(unsigned int cpu,enum cpuhp_state target)1278 static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1279 {
1280 int err;
1281
1282 cpu_maps_update_begin();
1283 err = cpu_down_maps_locked(cpu, target);
1284 cpu_maps_update_done();
1285 return err;
1286 }
1287
1288 /**
1289 * cpu_device_down - Bring down a cpu device
1290 * @dev: Pointer to the cpu device to offline
1291 *
1292 * This function is meant to be used by device core cpu subsystem only.
1293 *
1294 * Other subsystems should use remove_cpu() instead.
1295 *
1296 * Return: %0 on success or a negative errno code
1297 */
cpu_device_down(struct device * dev)1298 int cpu_device_down(struct device *dev)
1299 {
1300 return cpu_down(dev->id, CPUHP_OFFLINE);
1301 }
1302
remove_cpu(unsigned int cpu)1303 int remove_cpu(unsigned int cpu)
1304 {
1305 int ret;
1306
1307 lock_device_hotplug();
1308 ret = device_offline(get_cpu_device(cpu));
1309 unlock_device_hotplug();
1310
1311 return ret;
1312 }
1313 EXPORT_SYMBOL_GPL(remove_cpu);
1314
smp_shutdown_nonboot_cpus(unsigned int primary_cpu)1315 void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
1316 {
1317 unsigned int cpu;
1318 int error;
1319
1320 cpu_maps_update_begin();
1321
1322 /*
1323 * Make certain the cpu I'm about to reboot on is online.
1324 *
1325 * This is inline to what migrate_to_reboot_cpu() already do.
1326 */
1327 if (!cpu_online(primary_cpu))
1328 primary_cpu = cpumask_first(cpu_online_mask);
1329
1330 for_each_online_cpu(cpu) {
1331 if (cpu == primary_cpu)
1332 continue;
1333
1334 error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
1335 if (error) {
1336 pr_err("Failed to offline CPU%d - error=%d",
1337 cpu, error);
1338 break;
1339 }
1340 }
1341
1342 /*
1343 * Ensure all but the reboot CPU are offline.
1344 */
1345 BUG_ON(num_online_cpus() > 1);
1346
1347 /*
1348 * Make sure the CPUs won't be enabled by someone else after this
1349 * point. Kexec will reboot to a new kernel shortly resetting
1350 * everything along the way.
1351 */
1352 cpu_hotplug_disabled++;
1353
1354 cpu_maps_update_done();
1355 }
1356
1357 #else
1358 #define takedown_cpu NULL
1359 #endif /*CONFIG_HOTPLUG_CPU*/
1360
1361 /**
1362 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1363 * @cpu: cpu that just started
1364 *
1365 * It must be called by the arch code on the new cpu, before the new cpu
1366 * enables interrupts and before the "boot" cpu returns from __cpu_up().
1367 */
notify_cpu_starting(unsigned int cpu)1368 void notify_cpu_starting(unsigned int cpu)
1369 {
1370 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1371 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1372
1373 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1374 cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1375
1376 /*
1377 * STARTING must not fail!
1378 */
1379 cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
1380 }
1381
1382 /*
1383 * Called from the idle task. Wake up the controlling task which brings the
1384 * hotplug thread of the upcoming CPU up and then delegates the rest of the
1385 * online bringup to the hotplug thread.
1386 */
cpuhp_online_idle(enum cpuhp_state state)1387 void cpuhp_online_idle(enum cpuhp_state state)
1388 {
1389 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1390
1391 /* Happens for the boot cpu */
1392 if (state != CPUHP_AP_ONLINE_IDLE)
1393 return;
1394
1395 /*
1396 * Unpart the stopper thread before we start the idle loop (and start
1397 * scheduling); this ensures the stopper task is always available.
1398 */
1399 stop_machine_unpark(smp_processor_id());
1400
1401 st->state = CPUHP_AP_ONLINE_IDLE;
1402 complete_ap_thread(st, true);
1403 }
1404
1405 /* Requires cpu_add_remove_lock to be held */
_cpu_up(unsigned int cpu,int tasks_frozen,enum cpuhp_state target)1406 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1407 {
1408 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1409 struct task_struct *idle;
1410 int ret = 0;
1411
1412 cpus_write_lock();
1413
1414 if (!cpu_present(cpu)) {
1415 ret = -EINVAL;
1416 goto out;
1417 }
1418
1419 /*
1420 * The caller of cpu_up() might have raced with another
1421 * caller. Nothing to do.
1422 */
1423 if (st->state >= target)
1424 goto out;
1425
1426 if (st->state == CPUHP_OFFLINE) {
1427 /* Let it fail before we try to bring the cpu up */
1428 idle = idle_thread_get(cpu);
1429 if (IS_ERR(idle)) {
1430 ret = PTR_ERR(idle);
1431 goto out;
1432 }
1433 }
1434
1435 cpuhp_tasks_frozen = tasks_frozen;
1436
1437 cpuhp_set_state(cpu, st, target);
1438 /*
1439 * If the current CPU state is in the range of the AP hotplug thread,
1440 * then we need to kick the thread once more.
1441 */
1442 if (st->state > CPUHP_BRINGUP_CPU) {
1443 ret = cpuhp_kick_ap_work(cpu);
1444 /*
1445 * The AP side has done the error rollback already. Just
1446 * return the error code..
1447 */
1448 if (ret)
1449 goto out;
1450 }
1451
1452 /*
1453 * Try to reach the target state. We max out on the BP at
1454 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1455 * responsible for bringing it up to the target state.
1456 */
1457 target = min((int)target, CPUHP_BRINGUP_CPU);
1458 ret = cpuhp_up_callbacks(cpu, st, target);
1459 out:
1460 cpus_write_unlock();
1461 arch_smt_update();
1462 cpu_up_down_serialize_trainwrecks(tasks_frozen);
1463 return ret;
1464 }
1465
cpu_up(unsigned int cpu,enum cpuhp_state target)1466 static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1467 {
1468 int err = 0;
1469
1470 if (!cpu_possible(cpu)) {
1471 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1472 cpu);
1473 #if defined(CONFIG_IA64)
1474 pr_err("please check additional_cpus= boot parameter\n");
1475 #endif
1476 return -EINVAL;
1477 }
1478
1479 err = try_online_node(cpu_to_node(cpu));
1480 if (err)
1481 return err;
1482
1483 cpu_maps_update_begin();
1484
1485 if (cpu_hotplug_disabled) {
1486 err = -EBUSY;
1487 goto out;
1488 }
1489 if (!cpu_bootable(cpu)) {
1490 err = -EPERM;
1491 goto out;
1492 }
1493
1494 err = _cpu_up(cpu, 0, target);
1495 out:
1496 cpu_maps_update_done();
1497 return err;
1498 }
1499
1500 /**
1501 * cpu_device_up - Bring up a cpu device
1502 * @dev: Pointer to the cpu device to online
1503 *
1504 * This function is meant to be used by device core cpu subsystem only.
1505 *
1506 * Other subsystems should use add_cpu() instead.
1507 *
1508 * Return: %0 on success or a negative errno code
1509 */
cpu_device_up(struct device * dev)1510 int cpu_device_up(struct device *dev)
1511 {
1512 return cpu_up(dev->id, CPUHP_ONLINE);
1513 }
1514
add_cpu(unsigned int cpu)1515 int add_cpu(unsigned int cpu)
1516 {
1517 int ret;
1518
1519 lock_device_hotplug();
1520 ret = device_online(get_cpu_device(cpu));
1521 unlock_device_hotplug();
1522
1523 return ret;
1524 }
1525 EXPORT_SYMBOL_GPL(add_cpu);
1526
1527 /**
1528 * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
1529 * @sleep_cpu: The cpu we hibernated on and should be brought up.
1530 *
1531 * On some architectures like arm64, we can hibernate on any CPU, but on
1532 * wake up the CPU we hibernated on might be offline as a side effect of
1533 * using maxcpus= for example.
1534 *
1535 * Return: %0 on success or a negative errno code
1536 */
bringup_hibernate_cpu(unsigned int sleep_cpu)1537 int bringup_hibernate_cpu(unsigned int sleep_cpu)
1538 {
1539 int ret;
1540
1541 if (!cpu_online(sleep_cpu)) {
1542 pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
1543 ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
1544 if (ret) {
1545 pr_err("Failed to bring hibernate-CPU up!\n");
1546 return ret;
1547 }
1548 }
1549 return 0;
1550 }
1551
bringup_nonboot_cpus(unsigned int setup_max_cpus)1552 void bringup_nonboot_cpus(unsigned int setup_max_cpus)
1553 {
1554 unsigned int cpu;
1555
1556 for_each_present_cpu(cpu) {
1557 if (num_online_cpus() >= setup_max_cpus)
1558 break;
1559 if (!cpu_online(cpu))
1560 cpu_up(cpu, CPUHP_ONLINE);
1561 }
1562 }
1563
1564 #ifdef CONFIG_PM_SLEEP_SMP
1565 static cpumask_var_t frozen_cpus;
1566
freeze_secondary_cpus(int primary)1567 int freeze_secondary_cpus(int primary)
1568 {
1569 int cpu, error = 0;
1570
1571 cpu_maps_update_begin();
1572 if (primary == -1) {
1573 primary = cpumask_first(cpu_online_mask);
1574 if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
1575 primary = housekeeping_any_cpu(HK_TYPE_TIMER);
1576 } else {
1577 if (!cpu_online(primary))
1578 primary = cpumask_first(cpu_online_mask);
1579 }
1580
1581 /*
1582 * We take down all of the non-boot CPUs in one shot to avoid races
1583 * with the userspace trying to use the CPU hotplug at the same time
1584 */
1585 cpumask_clear(frozen_cpus);
1586
1587 pr_info("Disabling non-boot CPUs ...\n");
1588 for_each_online_cpu(cpu) {
1589 if (cpu == primary)
1590 continue;
1591
1592 if (pm_wakeup_pending()) {
1593 pr_info("Wakeup pending. Abort CPU freeze\n");
1594 error = -EBUSY;
1595 break;
1596 }
1597
1598 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1599 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1600 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1601 if (!error)
1602 cpumask_set_cpu(cpu, frozen_cpus);
1603 else {
1604 pr_err("Error taking CPU%d down: %d\n", cpu, error);
1605 break;
1606 }
1607 }
1608
1609 if (!error)
1610 BUG_ON(num_online_cpus() > 1);
1611 else
1612 pr_err("Non-boot CPUs are not disabled\n");
1613
1614 /*
1615 * Make sure the CPUs won't be enabled by someone else. We need to do
1616 * this even in case of failure as all freeze_secondary_cpus() users are
1617 * supposed to do thaw_secondary_cpus() on the failure path.
1618 */
1619 cpu_hotplug_disabled++;
1620
1621 cpu_maps_update_done();
1622 return error;
1623 }
1624
arch_thaw_secondary_cpus_begin(void)1625 void __weak arch_thaw_secondary_cpus_begin(void)
1626 {
1627 }
1628
arch_thaw_secondary_cpus_end(void)1629 void __weak arch_thaw_secondary_cpus_end(void)
1630 {
1631 }
1632
thaw_secondary_cpus(void)1633 void thaw_secondary_cpus(void)
1634 {
1635 int cpu, error;
1636
1637 /* Allow everyone to use the CPU hotplug again */
1638 cpu_maps_update_begin();
1639 __cpu_hotplug_enable();
1640 if (cpumask_empty(frozen_cpus))
1641 goto out;
1642
1643 pr_info("Enabling non-boot CPUs ...\n");
1644
1645 arch_thaw_secondary_cpus_begin();
1646
1647 for_each_cpu(cpu, frozen_cpus) {
1648 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1649 error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1650 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1651 if (!error) {
1652 pr_info("CPU%d is up\n", cpu);
1653 continue;
1654 }
1655 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1656 }
1657
1658 arch_thaw_secondary_cpus_end();
1659
1660 cpumask_clear(frozen_cpus);
1661 out:
1662 cpu_maps_update_done();
1663 }
1664
alloc_frozen_cpus(void)1665 static int __init alloc_frozen_cpus(void)
1666 {
1667 if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1668 return -ENOMEM;
1669 return 0;
1670 }
1671 core_initcall(alloc_frozen_cpus);
1672
1673 /*
1674 * When callbacks for CPU hotplug notifications are being executed, we must
1675 * ensure that the state of the system with respect to the tasks being frozen
1676 * or not, as reported by the notification, remains unchanged *throughout the
1677 * duration* of the execution of the callbacks.
1678 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1679 *
1680 * This synchronization is implemented by mutually excluding regular CPU
1681 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1682 * Hibernate notifications.
1683 */
1684 static int
cpu_hotplug_pm_callback(struct notifier_block * nb,unsigned long action,void * ptr)1685 cpu_hotplug_pm_callback(struct notifier_block *nb,
1686 unsigned long action, void *ptr)
1687 {
1688 switch (action) {
1689
1690 case PM_SUSPEND_PREPARE:
1691 case PM_HIBERNATION_PREPARE:
1692 cpu_hotplug_disable();
1693 break;
1694
1695 case PM_POST_SUSPEND:
1696 case PM_POST_HIBERNATION:
1697 cpu_hotplug_enable();
1698 break;
1699
1700 default:
1701 return NOTIFY_DONE;
1702 }
1703
1704 return NOTIFY_OK;
1705 }
1706
1707
cpu_hotplug_pm_sync_init(void)1708 static int __init cpu_hotplug_pm_sync_init(void)
1709 {
1710 /*
1711 * cpu_hotplug_pm_callback has higher priority than x86
1712 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1713 * to disable cpu hotplug to avoid cpu hotplug race.
1714 */
1715 pm_notifier(cpu_hotplug_pm_callback, 0);
1716 return 0;
1717 }
1718 core_initcall(cpu_hotplug_pm_sync_init);
1719
1720 #endif /* CONFIG_PM_SLEEP_SMP */
1721
1722 int __boot_cpu_id;
1723
1724 #endif /* CONFIG_SMP */
1725
1726 /* Boot processor state steps */
1727 static struct cpuhp_step cpuhp_hp_states[] = {
1728 [CPUHP_OFFLINE] = {
1729 .name = "offline",
1730 .startup.single = NULL,
1731 .teardown.single = NULL,
1732 },
1733 #ifdef CONFIG_SMP
1734 [CPUHP_CREATE_THREADS]= {
1735 .name = "threads:prepare",
1736 .startup.single = smpboot_create_threads,
1737 .teardown.single = NULL,
1738 .cant_stop = true,
1739 },
1740 [CPUHP_PERF_PREPARE] = {
1741 .name = "perf:prepare",
1742 .startup.single = perf_event_init_cpu,
1743 .teardown.single = perf_event_exit_cpu,
1744 },
1745 [CPUHP_RANDOM_PREPARE] = {
1746 .name = "random:prepare",
1747 .startup.single = random_prepare_cpu,
1748 .teardown.single = NULL,
1749 },
1750 [CPUHP_WORKQUEUE_PREP] = {
1751 .name = "workqueue:prepare",
1752 .startup.single = workqueue_prepare_cpu,
1753 .teardown.single = NULL,
1754 },
1755 [CPUHP_HRTIMERS_PREPARE] = {
1756 .name = "hrtimers:prepare",
1757 .startup.single = hrtimers_prepare_cpu,
1758 .teardown.single = hrtimers_dead_cpu,
1759 },
1760 [CPUHP_SMPCFD_PREPARE] = {
1761 .name = "smpcfd:prepare",
1762 .startup.single = smpcfd_prepare_cpu,
1763 .teardown.single = smpcfd_dead_cpu,
1764 },
1765 [CPUHP_RELAY_PREPARE] = {
1766 .name = "relay:prepare",
1767 .startup.single = relay_prepare_cpu,
1768 .teardown.single = NULL,
1769 },
1770 [CPUHP_SLAB_PREPARE] = {
1771 .name = "slab:prepare",
1772 .startup.single = slab_prepare_cpu,
1773 .teardown.single = slab_dead_cpu,
1774 },
1775 [CPUHP_RCUTREE_PREP] = {
1776 .name = "RCU/tree:prepare",
1777 .startup.single = rcutree_prepare_cpu,
1778 .teardown.single = rcutree_dead_cpu,
1779 },
1780 /*
1781 * On the tear-down path, timers_dead_cpu() must be invoked
1782 * before blk_mq_queue_reinit_notify() from notify_dead(),
1783 * otherwise a RCU stall occurs.
1784 */
1785 [CPUHP_TIMERS_PREPARE] = {
1786 .name = "timers:prepare",
1787 .startup.single = timers_prepare_cpu,
1788 .teardown.single = timers_dead_cpu,
1789 },
1790 /* Kicks the plugged cpu into life */
1791 [CPUHP_BRINGUP_CPU] = {
1792 .name = "cpu:bringup",
1793 .startup.single = bringup_cpu,
1794 .teardown.single = finish_cpu,
1795 .cant_stop = true,
1796 },
1797 /* Final state before CPU kills itself */
1798 [CPUHP_AP_IDLE_DEAD] = {
1799 .name = "idle:dead",
1800 },
1801 /*
1802 * Last state before CPU enters the idle loop to die. Transient state
1803 * for synchronization.
1804 */
1805 [CPUHP_AP_OFFLINE] = {
1806 .name = "ap:offline",
1807 .cant_stop = true,
1808 },
1809 /* First state is scheduler control. Interrupts are disabled */
1810 [CPUHP_AP_SCHED_STARTING] = {
1811 .name = "sched:starting",
1812 .startup.single = sched_cpu_starting,
1813 .teardown.single = sched_cpu_dying,
1814 },
1815 [CPUHP_AP_RCUTREE_DYING] = {
1816 .name = "RCU/tree:dying",
1817 .startup.single = NULL,
1818 .teardown.single = rcutree_dying_cpu,
1819 },
1820 [CPUHP_AP_SMPCFD_DYING] = {
1821 .name = "smpcfd:dying",
1822 .startup.single = NULL,
1823 .teardown.single = smpcfd_dying_cpu,
1824 },
1825 /* Entry state on starting. Interrupts enabled from here on. Transient
1826 * state for synchronsization */
1827 [CPUHP_AP_ONLINE] = {
1828 .name = "ap:online",
1829 },
1830 /*
1831 * Handled on control processor until the plugged processor manages
1832 * this itself.
1833 */
1834 [CPUHP_TEARDOWN_CPU] = {
1835 .name = "cpu:teardown",
1836 .startup.single = NULL,
1837 .teardown.single = takedown_cpu,
1838 .cant_stop = true,
1839 },
1840
1841 [CPUHP_AP_SCHED_WAIT_EMPTY] = {
1842 .name = "sched:waitempty",
1843 .startup.single = NULL,
1844 .teardown.single = sched_cpu_wait_empty,
1845 },
1846
1847 /* Handle smpboot threads park/unpark */
1848 [CPUHP_AP_SMPBOOT_THREADS] = {
1849 .name = "smpboot/threads:online",
1850 .startup.single = smpboot_unpark_threads,
1851 .teardown.single = smpboot_park_threads,
1852 },
1853 [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
1854 .name = "irq/affinity:online",
1855 .startup.single = irq_affinity_online_cpu,
1856 .teardown.single = NULL,
1857 },
1858 [CPUHP_AP_PERF_ONLINE] = {
1859 .name = "perf:online",
1860 .startup.single = perf_event_init_cpu,
1861 .teardown.single = perf_event_exit_cpu,
1862 },
1863 [CPUHP_AP_WATCHDOG_ONLINE] = {
1864 .name = "lockup_detector:online",
1865 .startup.single = lockup_detector_online_cpu,
1866 .teardown.single = lockup_detector_offline_cpu,
1867 },
1868 [CPUHP_AP_WORKQUEUE_ONLINE] = {
1869 .name = "workqueue:online",
1870 .startup.single = workqueue_online_cpu,
1871 .teardown.single = workqueue_offline_cpu,
1872 },
1873 [CPUHP_AP_RANDOM_ONLINE] = {
1874 .name = "random:online",
1875 .startup.single = random_online_cpu,
1876 .teardown.single = NULL,
1877 },
1878 [CPUHP_AP_RCUTREE_ONLINE] = {
1879 .name = "RCU/tree:online",
1880 .startup.single = rcutree_online_cpu,
1881 .teardown.single = rcutree_offline_cpu,
1882 },
1883 #endif
1884 /*
1885 * The dynamically registered state space is here
1886 */
1887
1888 #ifdef CONFIG_SMP
1889 /* Last state is scheduler control setting the cpu active */
1890 [CPUHP_AP_ACTIVE] = {
1891 .name = "sched:active",
1892 .startup.single = sched_cpu_activate,
1893 .teardown.single = sched_cpu_deactivate,
1894 },
1895 #endif
1896
1897 /* CPU is fully up and running. */
1898 [CPUHP_ONLINE] = {
1899 .name = "online",
1900 .startup.single = NULL,
1901 .teardown.single = NULL,
1902 },
1903 };
1904
1905 /* Sanity check for callbacks */
cpuhp_cb_check(enum cpuhp_state state)1906 static int cpuhp_cb_check(enum cpuhp_state state)
1907 {
1908 if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1909 return -EINVAL;
1910 return 0;
1911 }
1912
1913 /*
1914 * Returns a free for dynamic slot assignment of the Online state. The states
1915 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1916 * by having no name assigned.
1917 */
cpuhp_reserve_state(enum cpuhp_state state)1918 static int cpuhp_reserve_state(enum cpuhp_state state)
1919 {
1920 enum cpuhp_state i, end;
1921 struct cpuhp_step *step;
1922
1923 switch (state) {
1924 case CPUHP_AP_ONLINE_DYN:
1925 step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1926 end = CPUHP_AP_ONLINE_DYN_END;
1927 break;
1928 case CPUHP_BP_PREPARE_DYN:
1929 step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1930 end = CPUHP_BP_PREPARE_DYN_END;
1931 break;
1932 default:
1933 return -EINVAL;
1934 }
1935
1936 for (i = state; i <= end; i++, step++) {
1937 if (!step->name)
1938 return i;
1939 }
1940 WARN(1, "No more dynamic states available for CPU hotplug\n");
1941 return -ENOSPC;
1942 }
1943
cpuhp_store_callbacks(enum cpuhp_state state,const char * name,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)1944 static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1945 int (*startup)(unsigned int cpu),
1946 int (*teardown)(unsigned int cpu),
1947 bool multi_instance)
1948 {
1949 /* (Un)Install the callbacks for further cpu hotplug operations */
1950 struct cpuhp_step *sp;
1951 int ret = 0;
1952
1953 /*
1954 * If name is NULL, then the state gets removed.
1955 *
1956 * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1957 * the first allocation from these dynamic ranges, so the removal
1958 * would trigger a new allocation and clear the wrong (already
1959 * empty) state, leaving the callbacks of the to be cleared state
1960 * dangling, which causes wreckage on the next hotplug operation.
1961 */
1962 if (name && (state == CPUHP_AP_ONLINE_DYN ||
1963 state == CPUHP_BP_PREPARE_DYN)) {
1964 ret = cpuhp_reserve_state(state);
1965 if (ret < 0)
1966 return ret;
1967 state = ret;
1968 }
1969 sp = cpuhp_get_step(state);
1970 if (name && sp->name)
1971 return -EBUSY;
1972
1973 sp->startup.single = startup;
1974 sp->teardown.single = teardown;
1975 sp->name = name;
1976 sp->multi_instance = multi_instance;
1977 INIT_HLIST_HEAD(&sp->list);
1978 return ret;
1979 }
1980
cpuhp_get_teardown_cb(enum cpuhp_state state)1981 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1982 {
1983 return cpuhp_get_step(state)->teardown.single;
1984 }
1985
1986 /*
1987 * Call the startup/teardown function for a step either on the AP or
1988 * on the current CPU.
1989 */
cpuhp_issue_call(int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node)1990 static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1991 struct hlist_node *node)
1992 {
1993 struct cpuhp_step *sp = cpuhp_get_step(state);
1994 int ret;
1995
1996 /*
1997 * If there's nothing to do, we done.
1998 * Relies on the union for multi_instance.
1999 */
2000 if (cpuhp_step_empty(bringup, sp))
2001 return 0;
2002 /*
2003 * The non AP bound callbacks can fail on bringup. On teardown
2004 * e.g. module removal we crash for now.
2005 */
2006 #ifdef CONFIG_SMP
2007 if (cpuhp_is_ap_state(state))
2008 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
2009 else
2010 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
2011 #else
2012 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
2013 #endif
2014 BUG_ON(ret && !bringup);
2015 return ret;
2016 }
2017
2018 /*
2019 * Called from __cpuhp_setup_state on a recoverable failure.
2020 *
2021 * Note: The teardown callbacks for rollback are not allowed to fail!
2022 */
cpuhp_rollback_install(int failedcpu,enum cpuhp_state state,struct hlist_node * node)2023 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
2024 struct hlist_node *node)
2025 {
2026 int cpu;
2027
2028 /* Roll back the already executed steps on the other cpus */
2029 for_each_present_cpu(cpu) {
2030 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2031 int cpustate = st->state;
2032
2033 if (cpu >= failedcpu)
2034 break;
2035
2036 /* Did we invoke the startup call on that cpu ? */
2037 if (cpustate >= state)
2038 cpuhp_issue_call(cpu, state, false, node);
2039 }
2040 }
2041
__cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,struct hlist_node * node,bool invoke)2042 int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
2043 struct hlist_node *node,
2044 bool invoke)
2045 {
2046 struct cpuhp_step *sp;
2047 int cpu;
2048 int ret;
2049
2050 lockdep_assert_cpus_held();
2051
2052 sp = cpuhp_get_step(state);
2053 if (sp->multi_instance == false)
2054 return -EINVAL;
2055
2056 mutex_lock(&cpuhp_state_mutex);
2057
2058 if (!invoke || !sp->startup.multi)
2059 goto add_node;
2060
2061 /*
2062 * Try to call the startup callback for each present cpu
2063 * depending on the hotplug state of the cpu.
2064 */
2065 for_each_present_cpu(cpu) {
2066 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2067 int cpustate = st->state;
2068
2069 if (cpustate < state)
2070 continue;
2071
2072 ret = cpuhp_issue_call(cpu, state, true, node);
2073 if (ret) {
2074 if (sp->teardown.multi)
2075 cpuhp_rollback_install(cpu, state, node);
2076 goto unlock;
2077 }
2078 }
2079 add_node:
2080 ret = 0;
2081 hlist_add_head(node, &sp->list);
2082 unlock:
2083 mutex_unlock(&cpuhp_state_mutex);
2084 return ret;
2085 }
2086
__cpuhp_state_add_instance(enum cpuhp_state state,struct hlist_node * node,bool invoke)2087 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
2088 bool invoke)
2089 {
2090 int ret;
2091
2092 cpus_read_lock();
2093 ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
2094 cpus_read_unlock();
2095 return ret;
2096 }
2097 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
2098
2099 /**
2100 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
2101 * @state: The state to setup
2102 * @name: Name of the step
2103 * @invoke: If true, the startup function is invoked for cpus where
2104 * cpu state >= @state
2105 * @startup: startup callback function
2106 * @teardown: teardown callback function
2107 * @multi_instance: State is set up for multiple instances which get
2108 * added afterwards.
2109 *
2110 * The caller needs to hold cpus read locked while calling this function.
2111 * Return:
2112 * On success:
2113 * Positive state number if @state is CPUHP_AP_ONLINE_DYN;
2114 * 0 for all other states
2115 * On failure: proper (negative) error code
2116 */
__cpuhp_setup_state_cpuslocked(enum cpuhp_state state,const char * name,bool invoke,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)2117 int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
2118 const char *name, bool invoke,
2119 int (*startup)(unsigned int cpu),
2120 int (*teardown)(unsigned int cpu),
2121 bool multi_instance)
2122 {
2123 int cpu, ret = 0;
2124 bool dynstate;
2125
2126 lockdep_assert_cpus_held();
2127
2128 if (cpuhp_cb_check(state) || !name)
2129 return -EINVAL;
2130
2131 mutex_lock(&cpuhp_state_mutex);
2132
2133 ret = cpuhp_store_callbacks(state, name, startup, teardown,
2134 multi_instance);
2135
2136 dynstate = state == CPUHP_AP_ONLINE_DYN;
2137 if (ret > 0 && dynstate) {
2138 state = ret;
2139 ret = 0;
2140 }
2141
2142 if (ret || !invoke || !startup)
2143 goto out;
2144
2145 /*
2146 * Try to call the startup callback for each present cpu
2147 * depending on the hotplug state of the cpu.
2148 */
2149 for_each_present_cpu(cpu) {
2150 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2151 int cpustate = st->state;
2152
2153 if (cpustate < state)
2154 continue;
2155
2156 ret = cpuhp_issue_call(cpu, state, true, NULL);
2157 if (ret) {
2158 if (teardown)
2159 cpuhp_rollback_install(cpu, state, NULL);
2160 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2161 goto out;
2162 }
2163 }
2164 out:
2165 mutex_unlock(&cpuhp_state_mutex);
2166 /*
2167 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
2168 * dynamically allocated state in case of success.
2169 */
2170 if (!ret && dynstate)
2171 return state;
2172 return ret;
2173 }
2174 EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
2175
__cpuhp_setup_state(enum cpuhp_state state,const char * name,bool invoke,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)2176 int __cpuhp_setup_state(enum cpuhp_state state,
2177 const char *name, bool invoke,
2178 int (*startup)(unsigned int cpu),
2179 int (*teardown)(unsigned int cpu),
2180 bool multi_instance)
2181 {
2182 int ret;
2183
2184 cpus_read_lock();
2185 ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
2186 teardown, multi_instance);
2187 cpus_read_unlock();
2188 return ret;
2189 }
2190 EXPORT_SYMBOL(__cpuhp_setup_state);
2191
__cpuhp_state_remove_instance(enum cpuhp_state state,struct hlist_node * node,bool invoke)2192 int __cpuhp_state_remove_instance(enum cpuhp_state state,
2193 struct hlist_node *node, bool invoke)
2194 {
2195 struct cpuhp_step *sp = cpuhp_get_step(state);
2196 int cpu;
2197
2198 BUG_ON(cpuhp_cb_check(state));
2199
2200 if (!sp->multi_instance)
2201 return -EINVAL;
2202
2203 cpus_read_lock();
2204 mutex_lock(&cpuhp_state_mutex);
2205
2206 if (!invoke || !cpuhp_get_teardown_cb(state))
2207 goto remove;
2208 /*
2209 * Call the teardown callback for each present cpu depending
2210 * on the hotplug state of the cpu. This function is not
2211 * allowed to fail currently!
2212 */
2213 for_each_present_cpu(cpu) {
2214 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2215 int cpustate = st->state;
2216
2217 if (cpustate >= state)
2218 cpuhp_issue_call(cpu, state, false, node);
2219 }
2220
2221 remove:
2222 hlist_del(node);
2223 mutex_unlock(&cpuhp_state_mutex);
2224 cpus_read_unlock();
2225
2226 return 0;
2227 }
2228 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
2229
2230 /**
2231 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
2232 * @state: The state to remove
2233 * @invoke: If true, the teardown function is invoked for cpus where
2234 * cpu state >= @state
2235 *
2236 * The caller needs to hold cpus read locked while calling this function.
2237 * The teardown callback is currently not allowed to fail. Think
2238 * about module removal!
2239 */
__cpuhp_remove_state_cpuslocked(enum cpuhp_state state,bool invoke)2240 void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
2241 {
2242 struct cpuhp_step *sp = cpuhp_get_step(state);
2243 int cpu;
2244
2245 BUG_ON(cpuhp_cb_check(state));
2246
2247 lockdep_assert_cpus_held();
2248
2249 mutex_lock(&cpuhp_state_mutex);
2250 if (sp->multi_instance) {
2251 WARN(!hlist_empty(&sp->list),
2252 "Error: Removing state %d which has instances left.\n",
2253 state);
2254 goto remove;
2255 }
2256
2257 if (!invoke || !cpuhp_get_teardown_cb(state))
2258 goto remove;
2259
2260 /*
2261 * Call the teardown callback for each present cpu depending
2262 * on the hotplug state of the cpu. This function is not
2263 * allowed to fail currently!
2264 */
2265 for_each_present_cpu(cpu) {
2266 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2267 int cpustate = st->state;
2268
2269 if (cpustate >= state)
2270 cpuhp_issue_call(cpu, state, false, NULL);
2271 }
2272 remove:
2273 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2274 mutex_unlock(&cpuhp_state_mutex);
2275 }
2276 EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2277
__cpuhp_remove_state(enum cpuhp_state state,bool invoke)2278 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2279 {
2280 cpus_read_lock();
2281 __cpuhp_remove_state_cpuslocked(state, invoke);
2282 cpus_read_unlock();
2283 }
2284 EXPORT_SYMBOL(__cpuhp_remove_state);
2285
2286 #ifdef CONFIG_HOTPLUG_SMT
cpuhp_offline_cpu_device(unsigned int cpu)2287 static void cpuhp_offline_cpu_device(unsigned int cpu)
2288 {
2289 struct device *dev = get_cpu_device(cpu);
2290
2291 dev->offline = true;
2292 /* Tell user space about the state change */
2293 kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2294 }
2295
cpuhp_online_cpu_device(unsigned int cpu)2296 static void cpuhp_online_cpu_device(unsigned int cpu)
2297 {
2298 struct device *dev = get_cpu_device(cpu);
2299
2300 dev->offline = false;
2301 /* Tell user space about the state change */
2302 kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2303 }
2304
cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)2305 int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2306 {
2307 int cpu, ret = 0;
2308
2309 cpu_maps_update_begin();
2310 for_each_online_cpu(cpu) {
2311 if (topology_is_primary_thread(cpu))
2312 continue;
2313 /*
2314 * Disable can be called with CPU_SMT_ENABLED when changing
2315 * from a higher to lower number of SMT threads per core.
2316 */
2317 if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
2318 continue;
2319 ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2320 if (ret)
2321 break;
2322 /*
2323 * As this needs to hold the cpu maps lock it's impossible
2324 * to call device_offline() because that ends up calling
2325 * cpu_down() which takes cpu maps lock. cpu maps lock
2326 * needs to be held as this might race against in kernel
2327 * abusers of the hotplug machinery (thermal management).
2328 *
2329 * So nothing would update device:offline state. That would
2330 * leave the sysfs entry stale and prevent onlining after
2331 * smt control has been changed to 'off' again. This is
2332 * called under the sysfs hotplug lock, so it is properly
2333 * serialized against the regular offline usage.
2334 */
2335 cpuhp_offline_cpu_device(cpu);
2336 }
2337 if (!ret)
2338 cpu_smt_control = ctrlval;
2339 cpu_maps_update_done();
2340 return ret;
2341 }
2342
cpuhp_smt_enable(void)2343 int cpuhp_smt_enable(void)
2344 {
2345 int cpu, ret = 0;
2346
2347 cpu_maps_update_begin();
2348 cpu_smt_control = CPU_SMT_ENABLED;
2349 for_each_present_cpu(cpu) {
2350 /* Skip online CPUs and CPUs on offline nodes */
2351 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2352 continue;
2353 if (!cpu_smt_thread_allowed(cpu))
2354 continue;
2355 ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2356 if (ret)
2357 break;
2358 /* See comment in cpuhp_smt_disable() */
2359 cpuhp_online_cpu_device(cpu);
2360 }
2361 cpu_maps_update_done();
2362 return ret;
2363 }
2364 #endif
2365
2366 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
state_show(struct device * dev,struct device_attribute * attr,char * buf)2367 static ssize_t state_show(struct device *dev,
2368 struct device_attribute *attr, char *buf)
2369 {
2370 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2371
2372 return sprintf(buf, "%d\n", st->state);
2373 }
2374 static DEVICE_ATTR_RO(state);
2375
target_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2376 static ssize_t target_store(struct device *dev, struct device_attribute *attr,
2377 const char *buf, size_t count)
2378 {
2379 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2380 struct cpuhp_step *sp;
2381 int target, ret;
2382
2383 ret = kstrtoint(buf, 10, &target);
2384 if (ret)
2385 return ret;
2386
2387 #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2388 if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
2389 return -EINVAL;
2390 #else
2391 if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
2392 return -EINVAL;
2393 #endif
2394
2395 ret = lock_device_hotplug_sysfs();
2396 if (ret)
2397 return ret;
2398
2399 mutex_lock(&cpuhp_state_mutex);
2400 sp = cpuhp_get_step(target);
2401 ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2402 mutex_unlock(&cpuhp_state_mutex);
2403 if (ret)
2404 goto out;
2405
2406 if (st->state < target)
2407 ret = cpu_up(dev->id, target);
2408 else if (st->state > target)
2409 ret = cpu_down(dev->id, target);
2410 else if (WARN_ON(st->target != target))
2411 st->target = target;
2412 out:
2413 unlock_device_hotplug();
2414 return ret ? ret : count;
2415 }
2416
target_show(struct device * dev,struct device_attribute * attr,char * buf)2417 static ssize_t target_show(struct device *dev,
2418 struct device_attribute *attr, char *buf)
2419 {
2420 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2421
2422 return sprintf(buf, "%d\n", st->target);
2423 }
2424 static DEVICE_ATTR_RW(target);
2425
fail_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2426 static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
2427 const char *buf, size_t count)
2428 {
2429 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2430 struct cpuhp_step *sp;
2431 int fail, ret;
2432
2433 ret = kstrtoint(buf, 10, &fail);
2434 if (ret)
2435 return ret;
2436
2437 if (fail == CPUHP_INVALID) {
2438 st->fail = fail;
2439 return count;
2440 }
2441
2442 if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
2443 return -EINVAL;
2444
2445 /*
2446 * Cannot fail STARTING/DYING callbacks.
2447 */
2448 if (cpuhp_is_atomic_state(fail))
2449 return -EINVAL;
2450
2451 /*
2452 * DEAD callbacks cannot fail...
2453 * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
2454 * triggering STARTING callbacks, a failure in this state would
2455 * hinder rollback.
2456 */
2457 if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
2458 return -EINVAL;
2459
2460 /*
2461 * Cannot fail anything that doesn't have callbacks.
2462 */
2463 mutex_lock(&cpuhp_state_mutex);
2464 sp = cpuhp_get_step(fail);
2465 if (!sp->startup.single && !sp->teardown.single)
2466 ret = -EINVAL;
2467 mutex_unlock(&cpuhp_state_mutex);
2468 if (ret)
2469 return ret;
2470
2471 st->fail = fail;
2472
2473 return count;
2474 }
2475
fail_show(struct device * dev,struct device_attribute * attr,char * buf)2476 static ssize_t fail_show(struct device *dev,
2477 struct device_attribute *attr, char *buf)
2478 {
2479 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2480
2481 return sprintf(buf, "%d\n", st->fail);
2482 }
2483
2484 static DEVICE_ATTR_RW(fail);
2485
2486 static struct attribute *cpuhp_cpu_attrs[] = {
2487 &dev_attr_state.attr,
2488 &dev_attr_target.attr,
2489 &dev_attr_fail.attr,
2490 NULL
2491 };
2492
2493 static const struct attribute_group cpuhp_cpu_attr_group = {
2494 .attrs = cpuhp_cpu_attrs,
2495 .name = "hotplug",
2496 NULL
2497 };
2498
states_show(struct device * dev,struct device_attribute * attr,char * buf)2499 static ssize_t states_show(struct device *dev,
2500 struct device_attribute *attr, char *buf)
2501 {
2502 ssize_t cur, res = 0;
2503 int i;
2504
2505 mutex_lock(&cpuhp_state_mutex);
2506 for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2507 struct cpuhp_step *sp = cpuhp_get_step(i);
2508
2509 if (sp->name) {
2510 cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2511 buf += cur;
2512 res += cur;
2513 }
2514 }
2515 mutex_unlock(&cpuhp_state_mutex);
2516 return res;
2517 }
2518 static DEVICE_ATTR_RO(states);
2519
2520 static struct attribute *cpuhp_cpu_root_attrs[] = {
2521 &dev_attr_states.attr,
2522 NULL
2523 };
2524
2525 static const struct attribute_group cpuhp_cpu_root_attr_group = {
2526 .attrs = cpuhp_cpu_root_attrs,
2527 .name = "hotplug",
2528 NULL
2529 };
2530
2531 #ifdef CONFIG_HOTPLUG_SMT
2532
2533 static ssize_t
__store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2534 __store_smt_control(struct device *dev, struct device_attribute *attr,
2535 const char *buf, size_t count)
2536 {
2537 int ctrlval, ret;
2538
2539 if (sysfs_streq(buf, "on"))
2540 ctrlval = CPU_SMT_ENABLED;
2541 else if (sysfs_streq(buf, "off"))
2542 ctrlval = CPU_SMT_DISABLED;
2543 else if (sysfs_streq(buf, "forceoff"))
2544 ctrlval = CPU_SMT_FORCE_DISABLED;
2545 else
2546 return -EINVAL;
2547
2548 if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2549 return -EPERM;
2550
2551 if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2552 return -ENODEV;
2553
2554 ret = lock_device_hotplug_sysfs();
2555 if (ret)
2556 return ret;
2557
2558 if (ctrlval != cpu_smt_control) {
2559 switch (ctrlval) {
2560 case CPU_SMT_ENABLED:
2561 ret = cpuhp_smt_enable();
2562 break;
2563 case CPU_SMT_DISABLED:
2564 case CPU_SMT_FORCE_DISABLED:
2565 ret = cpuhp_smt_disable(ctrlval);
2566 break;
2567 }
2568 }
2569
2570 unlock_device_hotplug();
2571 return ret ? ret : count;
2572 }
2573
2574 #else /* !CONFIG_HOTPLUG_SMT */
2575 static ssize_t
__store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2576 __store_smt_control(struct device *dev, struct device_attribute *attr,
2577 const char *buf, size_t count)
2578 {
2579 return -ENODEV;
2580 }
2581 #endif /* CONFIG_HOTPLUG_SMT */
2582
2583 static const char *smt_states[] = {
2584 [CPU_SMT_ENABLED] = "on",
2585 [CPU_SMT_DISABLED] = "off",
2586 [CPU_SMT_FORCE_DISABLED] = "forceoff",
2587 [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2588 [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
2589 };
2590
control_show(struct device * dev,struct device_attribute * attr,char * buf)2591 static ssize_t control_show(struct device *dev,
2592 struct device_attribute *attr, char *buf)
2593 {
2594 const char *state = smt_states[cpu_smt_control];
2595
2596 return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
2597 }
2598
control_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2599 static ssize_t control_store(struct device *dev, struct device_attribute *attr,
2600 const char *buf, size_t count)
2601 {
2602 return __store_smt_control(dev, attr, buf, count);
2603 }
2604 static DEVICE_ATTR_RW(control);
2605
active_show(struct device * dev,struct device_attribute * attr,char * buf)2606 static ssize_t active_show(struct device *dev,
2607 struct device_attribute *attr, char *buf)
2608 {
2609 return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2610 }
2611 static DEVICE_ATTR_RO(active);
2612
2613 static struct attribute *cpuhp_smt_attrs[] = {
2614 &dev_attr_control.attr,
2615 &dev_attr_active.attr,
2616 NULL
2617 };
2618
2619 static const struct attribute_group cpuhp_smt_attr_group = {
2620 .attrs = cpuhp_smt_attrs,
2621 .name = "smt",
2622 NULL
2623 };
2624
cpu_smt_sysfs_init(void)2625 static int __init cpu_smt_sysfs_init(void)
2626 {
2627 return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2628 &cpuhp_smt_attr_group);
2629 }
2630
cpuhp_sysfs_init(void)2631 static int __init cpuhp_sysfs_init(void)
2632 {
2633 int cpu, ret;
2634
2635 ret = cpu_smt_sysfs_init();
2636 if (ret)
2637 return ret;
2638
2639 ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
2640 &cpuhp_cpu_root_attr_group);
2641 if (ret)
2642 return ret;
2643
2644 for_each_possible_cpu(cpu) {
2645 struct device *dev = get_cpu_device(cpu);
2646
2647 if (!dev)
2648 continue;
2649 ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2650 if (ret)
2651 return ret;
2652 }
2653 return 0;
2654 }
2655 device_initcall(cpuhp_sysfs_init);
2656 #endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2657
2658 /*
2659 * cpu_bit_bitmap[] is a special, "compressed" data structure that
2660 * represents all NR_CPUS bits binary values of 1<<nr.
2661 *
2662 * It is used by cpumask_of() to get a constant address to a CPU
2663 * mask value that has a single bit set only.
2664 */
2665
2666 /* cpu_bit_bitmap[0] is empty - so we can back into it */
2667 #define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
2668 #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2669 #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2670 #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2671
2672 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2673
2674 MASK_DECLARE_8(0), MASK_DECLARE_8(8),
2675 MASK_DECLARE_8(16), MASK_DECLARE_8(24),
2676 #if BITS_PER_LONG > 32
2677 MASK_DECLARE_8(32), MASK_DECLARE_8(40),
2678 MASK_DECLARE_8(48), MASK_DECLARE_8(56),
2679 #endif
2680 };
2681 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2682
2683 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2684 EXPORT_SYMBOL(cpu_all_bits);
2685
2686 #ifdef CONFIG_INIT_ALL_POSSIBLE
2687 struct cpumask __cpu_possible_mask __read_mostly
2688 = {CPU_BITS_ALL};
2689 #else
2690 struct cpumask __cpu_possible_mask __read_mostly;
2691 #endif
2692 EXPORT_SYMBOL(__cpu_possible_mask);
2693
2694 struct cpumask __cpu_online_mask __read_mostly;
2695 EXPORT_SYMBOL(__cpu_online_mask);
2696
2697 struct cpumask __cpu_present_mask __read_mostly;
2698 EXPORT_SYMBOL(__cpu_present_mask);
2699
2700 struct cpumask __cpu_active_mask __read_mostly;
2701 EXPORT_SYMBOL(__cpu_active_mask);
2702
2703 struct cpumask __cpu_dying_mask __read_mostly;
2704 EXPORT_SYMBOL(__cpu_dying_mask);
2705
2706 atomic_t __num_online_cpus __read_mostly;
2707 EXPORT_SYMBOL(__num_online_cpus);
2708
init_cpu_present(const struct cpumask * src)2709 void init_cpu_present(const struct cpumask *src)
2710 {
2711 cpumask_copy(&__cpu_present_mask, src);
2712 }
2713
init_cpu_possible(const struct cpumask * src)2714 void init_cpu_possible(const struct cpumask *src)
2715 {
2716 cpumask_copy(&__cpu_possible_mask, src);
2717 }
2718
init_cpu_online(const struct cpumask * src)2719 void init_cpu_online(const struct cpumask *src)
2720 {
2721 cpumask_copy(&__cpu_online_mask, src);
2722 }
2723
set_cpu_online(unsigned int cpu,bool online)2724 void set_cpu_online(unsigned int cpu, bool online)
2725 {
2726 /*
2727 * atomic_inc/dec() is required to handle the horrid abuse of this
2728 * function by the reboot and kexec code which invoke it from
2729 * IPI/NMI broadcasts when shutting down CPUs. Invocation from
2730 * regular CPU hotplug is properly serialized.
2731 *
2732 * Note, that the fact that __num_online_cpus is of type atomic_t
2733 * does not protect readers which are not serialized against
2734 * concurrent hotplug operations.
2735 */
2736 if (online) {
2737 if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
2738 atomic_inc(&__num_online_cpus);
2739 } else {
2740 if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
2741 atomic_dec(&__num_online_cpus);
2742 }
2743 }
2744
2745 /*
2746 * Activate the first processor.
2747 */
boot_cpu_init(void)2748 void __init boot_cpu_init(void)
2749 {
2750 int cpu = smp_processor_id();
2751
2752 /* Mark the boot cpu "present", "online" etc for SMP and UP case */
2753 set_cpu_online(cpu, true);
2754 set_cpu_active(cpu, true);
2755 set_cpu_present(cpu, true);
2756 set_cpu_possible(cpu, true);
2757
2758 #ifdef CONFIG_SMP
2759 __boot_cpu_id = cpu;
2760 #endif
2761 }
2762
2763 /*
2764 * Must be called _AFTER_ setting up the per_cpu areas
2765 */
boot_cpu_hotplug_init(void)2766 void __init boot_cpu_hotplug_init(void)
2767 {
2768 #ifdef CONFIG_SMP
2769 cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2770 #endif
2771 this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2772 }
2773
2774 /*
2775 * These are used for a global "mitigations=" cmdline option for toggling
2776 * optional CPU mitigations.
2777 */
2778 enum cpu_mitigations {
2779 CPU_MITIGATIONS_OFF,
2780 CPU_MITIGATIONS_AUTO,
2781 CPU_MITIGATIONS_AUTO_NOSMT,
2782 };
2783
2784 static enum cpu_mitigations cpu_mitigations __ro_after_init =
2785 CPU_MITIGATIONS_AUTO;
2786
mitigations_parse_cmdline(char * arg)2787 static int __init mitigations_parse_cmdline(char *arg)
2788 {
2789 if (!strcmp(arg, "off"))
2790 cpu_mitigations = CPU_MITIGATIONS_OFF;
2791 else if (!strcmp(arg, "auto"))
2792 cpu_mitigations = CPU_MITIGATIONS_AUTO;
2793 else if (!strcmp(arg, "auto,nosmt"))
2794 cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2795 else
2796 pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
2797 arg);
2798
2799 return 0;
2800 }
2801 early_param("mitigations", mitigations_parse_cmdline);
2802
2803 /* mitigations=off */
cpu_mitigations_off(void)2804 bool cpu_mitigations_off(void)
2805 {
2806 return cpu_mitigations == CPU_MITIGATIONS_OFF;
2807 }
2808 EXPORT_SYMBOL_GPL(cpu_mitigations_off);
2809
2810 /* mitigations=auto,nosmt */
cpu_mitigations_auto_nosmt(void)2811 bool cpu_mitigations_auto_nosmt(void)
2812 {
2813 return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
2814 }
2815 EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
2816