• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* CPU control.
2  * (C) 2001, 2002, 2003, 2004 Rusty Russell
3  *
4  * This code is licenced under the GPL.
5  */
6 #include <linux/sched/mm.h>
7 #include <linux/proc_fs.h>
8 #include <linux/smp.h>
9 #include <linux/init.h>
10 #include <linux/notifier.h>
11 #include <linux/sched/signal.h>
12 #include <linux/sched/hotplug.h>
13 #include <linux/sched/isolation.h>
14 #include <linux/sched/task.h>
15 #include <linux/sched/smt.h>
16 #include <linux/unistd.h>
17 #include <linux/cpu.h>
18 #include <linux/oom.h>
19 #include <linux/rcupdate.h>
20 #include <linux/export.h>
21 #include <linux/bug.h>
22 #include <linux/kthread.h>
23 #include <linux/stop_machine.h>
24 #include <linux/mutex.h>
25 #include <linux/gfp.h>
26 #include <linux/suspend.h>
27 #include <linux/lockdep.h>
28 #include <linux/tick.h>
29 #include <linux/irq.h>
30 #include <linux/nmi.h>
31 #include <linux/smpboot.h>
32 #include <linux/relay.h>
33 #include <linux/slab.h>
34 #include <linux/scs.h>
35 #include <linux/percpu-rwsem.h>
36 #include <linux/cpuset.h>
37 #include <linux/random.h>
38 
39 #include <trace/events/power.h>
40 #define CREATE_TRACE_POINTS
41 #include <trace/events/cpuhp.h>
42 
43 #undef CREATE_TRACE_POINTS
44 
45 #include "smpboot.h"
46 
47 #define CPU_PAGE_SIZE_OFF_TWO 2
48 
49 /**
50  * cpuhp_cpu_state - Per cpu hotplug state storage
51  * @state:    The current cpu state
52  * @target:    The target state
53  * @thread:    Pointer to the hotplug thread
54  * @should_run:    Thread should execute
55  * @rollback:    Perform a rollback
56  * @single:    Single callback invocation
57  * @bringup:    Single callback bringup or teardown selector
58  * @cb_state:    The state for a single callback (install/uninstall)
59  * @result:    Result of the operation
60  * @done_up:    Signal completion to the issuer of the task for cpu-up
61  * @done_down:    Signal completion to the issuer of the task for cpu-down
62  */
63 struct cpuhp_cpu_state {
64     enum cpuhp_state state;
65     enum cpuhp_state target;
66     enum cpuhp_state fail;
67 #ifdef CONFIG_SMP
68     struct task_struct *thread;
69     bool should_run;
70     bool rollback;
71     bool single;
72     bool bringup;
73     struct hlist_node *node;
74     struct hlist_node *last;
75     enum cpuhp_state cb_state;
76     int result;
77     struct completion done_up;
78     struct completion done_down;
79 #endif
80 };
81 
82 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
83     .fail = CPUHP_INVALID,
84 };
85 
86 #ifdef CONFIG_SMP
87 cpumask_t cpus_booted_once_mask;
88 #endif
89 
90 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
91 static struct lockdep_map cpuhp_state_up_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
92 static struct lockdep_map cpuhp_state_down_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
93 
cpuhp_lock_acquire(bool bringup)94 static inline void cpuhp_lock_acquire(bool bringup)
95 {
96     lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
97 }
98 
cpuhp_lock_release(bool bringup)99 static inline void cpuhp_lock_release(bool bringup)
100 {
101     lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
102 }
103 #else
104 
cpuhp_lock_acquire(bool bringup)105 static inline void cpuhp_lock_acquire(bool bringup)
106 {
107 }
cpuhp_lock_release(bool bringup)108 static inline void cpuhp_lock_release(bool bringup)
109 {
110 }
111 
112 #endif
113 
114 /**
115  * cpuhp_step - Hotplug state machine step
116  * @name:    Name of the step
117  * @startup:    Startup function of the step
118  * @teardown:    Teardown function of the step
119  * @cant_stop:    Bringup/teardown can't be stopped at this step
120  */
121 struct cpuhp_step {
122     const char *name;
123     union {
124         int (*single)(unsigned int cpu);
125         int (*multi)(unsigned int cpu, struct hlist_node *node);
126     } startup;
127     union {
128         int (*single)(unsigned int cpu);
129         int (*multi)(unsigned int cpu, struct hlist_node *node);
130     } teardown;
131     struct hlist_head list;
132     bool cant_stop;
133     bool multi_instance;
134 };
135 
136 static DEFINE_MUTEX(cpuhp_state_mutex);
137 static struct cpuhp_step cpuhp_hp_states[];
138 
cpuhp_get_step(enum cpuhp_state state)139 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
140 {
141     return cpuhp_hp_states + state;
142 }
143 
144 /**
145  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
146  * @cpu:    The cpu for which the callback should be invoked
147  * @state:    The state to do callbacks for
148  * @bringup:    True if the bringup callback should be invoked
149  * @node:    For multi-instance, do a single entry callback for install/remove
150  * @lastp:    For multi-instance rollback, remember how far we got
151  *
152  * Called from cpu hotplug and from the state register machinery.
153  */
cpuhp_invoke_callback(unsigned int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node,struct hlist_node ** lastp)154 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node,
155                                  struct hlist_node **lastp)
156 {
157     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
158     struct cpuhp_step *step = cpuhp_get_step(state);
159     int (*cbm)(unsigned int cpu, struct hlist_node *node);
160     int (*cb)(unsigned int cpu);
161     int ret, cnt;
162 
163     if (st->fail == state) {
164         st->fail = CPUHP_INVALID;
165 
166         if (!(bringup ? step->startup.single : step->teardown.single)) {
167             return 0;
168         }
169 
170         return -EAGAIN;
171     }
172 
173     if (!step->multi_instance) {
174         WARN_ON_ONCE(lastp && *lastp);
175         cb = bringup ? step->startup.single : step->teardown.single;
176         if (!cb) {
177             return 0;
178         }
179         trace_cpuhp_enter(cpu, st->target, state, cb);
180         ret = cb(cpu);
181         trace_cpuhp_exit(cpu, st->state, state, ret);
182         return ret;
183     }
184     cbm = bringup ? step->startup.multi : step->teardown.multi;
185     if (!cbm) {
186         return 0;
187     }
188 
189     /* Single invocation for instance add/remove */
190     if (node) {
191         WARN_ON_ONCE(lastp && *lastp);
192         trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
193         ret = cbm(cpu, node);
194         trace_cpuhp_exit(cpu, st->state, state, ret);
195         return ret;
196     }
197 
198     /* State transition. Invoke on all instances */
199     cnt = 0;
200     hlist_for_each(node, &step->list)
201     {
202         if (lastp && node == *lastp) {
203             break;
204         }
205 
206         trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
207         ret = cbm(cpu, node);
208         trace_cpuhp_exit(cpu, st->state, state, ret);
209         if (ret) {
210             if (!lastp) {
211                 goto err;
212             }
213 
214             *lastp = node;
215             return ret;
216         }
217         cnt++;
218     }
219     if (lastp) {
220         *lastp = NULL;
221     }
222     return 0;
223 err:
224     /* Rollback the instances if one failed */
225     cbm = !bringup ? step->startup.multi : step->teardown.multi;
226     if (!cbm) {
227         return ret;
228     }
229 
230     hlist_for_each(node, &step->list)
231     {
232         if (!cnt--) {
233             break;
234         }
235 
236         trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
237         ret = cbm(cpu, node);
238         trace_cpuhp_exit(cpu, st->state, state, ret);
239         /*
240          * Rollback must not fail,
241          */
242         WARN_ON_ONCE(ret);
243     }
244     return ret;
245 }
246 
247 #ifdef CONFIG_SMP
cpuhp_is_ap_state(enum cpuhp_state state)248 static bool cpuhp_is_ap_state(enum cpuhp_state state)
249 {
250     /*
251      * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
252      * purposes as that state is handled explicitly in cpu_down.
253      */
254     return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
255 }
256 
wait_for_ap_thread(struct cpuhp_cpu_state * st,bool bringup)257 static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
258 {
259     struct completion *done = bringup ? &st->done_up : &st->done_down;
260     wait_for_completion(done);
261 }
262 
complete_ap_thread(struct cpuhp_cpu_state * st,bool bringup)263 static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
264 {
265     struct completion *done = bringup ? &st->done_up : &st->done_down;
266     complete(done);
267 }
268 
269 /*
270  * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
271  */
cpuhp_is_atomic_state(enum cpuhp_state state)272 static bool cpuhp_is_atomic_state(enum cpuhp_state state)
273 {
274     return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
275 }
276 
277 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
278 static DEFINE_MUTEX(cpu_add_remove_lock);
279 bool cpuhp_tasks_frozen;
280 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
281 
282 /*
283  * The following two APIs (cpu_maps_update_begin/done) must be used when
284  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
285  */
cpu_maps_update_begin(void)286 void cpu_maps_update_begin(void)
287 {
288     mutex_lock(&cpu_add_remove_lock);
289 }
290 EXPORT_SYMBOL_GPL(cpu_maps_update_begin);
291 
cpu_maps_update_done(void)292 void cpu_maps_update_done(void)
293 {
294     mutex_unlock(&cpu_add_remove_lock);
295 }
296 EXPORT_SYMBOL_GPL(cpu_maps_update_done);
297 
298 /*
299  * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
300  * Should always be manipulated under cpu_add_remove_lock
301  */
302 static int cpu_hotplug_disabled;
303 
304 #ifdef CONFIG_HOTPLUG_CPU
305 
306 DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
307 
cpus_read_lock(void)308 void cpus_read_lock(void)
309 {
310     percpu_down_read(&cpu_hotplug_lock);
311 }
312 EXPORT_SYMBOL_GPL(cpus_read_lock);
313 
cpus_read_trylock(void)314 int cpus_read_trylock(void)
315 {
316     return percpu_down_read_trylock(&cpu_hotplug_lock);
317 }
318 EXPORT_SYMBOL_GPL(cpus_read_trylock);
319 
cpus_read_unlock(void)320 void cpus_read_unlock(void)
321 {
322     percpu_up_read(&cpu_hotplug_lock);
323 }
324 EXPORT_SYMBOL_GPL(cpus_read_unlock);
325 
cpus_write_lock(void)326 void cpus_write_lock(void)
327 {
328     percpu_down_write(&cpu_hotplug_lock);
329 }
330 
cpus_write_unlock(void)331 void cpus_write_unlock(void)
332 {
333     percpu_up_write(&cpu_hotplug_lock);
334 }
335 
lockdep_assert_cpus_held(void)336 void lockdep_assert_cpus_held(void)
337 {
338     /*
339      * We can't have hotplug operations before userspace starts running,
340      * and some init codepaths will knowingly not take the hotplug lock.
341      * This is all valid, so mute lockdep until it makes sense to report
342      * unheld locks.
343      */
344     if (system_state < SYSTEM_RUNNING) {
345         return;
346     }
347 
348     percpu_rwsem_assert_held(&cpu_hotplug_lock);
349 }
350 
lockdep_acquire_cpus_lock(void)351 static void lockdep_acquire_cpus_lock(void)
352 {
353     rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
354 }
355 
lockdep_release_cpus_lock(void)356 static void lockdep_release_cpus_lock(void)
357 {
358     rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
359 }
360 
361 /*
362  * Wait for currently running CPU hotplug operations to complete (if any) and
363  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
364  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
365  * hotplug path before performing hotplug operations. So acquiring that lock
366  * guarantees mutual exclusion from any currently running hotplug operations.
367  */
cpu_hotplug_disable(void)368 void cpu_hotplug_disable(void)
369 {
370     cpu_maps_update_begin();
371     cpu_hotplug_disabled++;
372     cpu_maps_update_done();
373 }
374 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
375 
_cpu_hotplug_enable(void)376 static void _cpu_hotplug_enable(void)
377 {
378     if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n")) {
379         return;
380     }
381     cpu_hotplug_disabled--;
382 }
383 
cpu_hotplug_enable(void)384 void cpu_hotplug_enable(void)
385 {
386     cpu_maps_update_begin();
387     _cpu_hotplug_enable();
388     cpu_maps_update_done();
389 }
390 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
391 
392 #else
393 
lockdep_acquire_cpus_lock(void)394 static void lockdep_acquire_cpus_lock(void)
395 {
396 }
397 
lockdep_release_cpus_lock(void)398 static void lockdep_release_cpus_lock(void)
399 {
400 }
401 
402 #endif /* CONFIG_HOTPLUG_CPU */
403 
404 /*
405  * Architectures that need SMT-specific errata handling during SMT hotplug
406  * should override this.
407  */
arch_smt_update(void)408 void __weak arch_smt_update(void)
409 {
410 }
411 
412 #ifdef CONFIG_HOTPLUG_SMT
413 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
414 
cpu_smt_disable(bool force)415 void __init cpu_smt_disable(bool force)
416 {
417     if (!cpu_smt_possible()) {
418         return;
419     }
420 
421     if (force) {
422         pr_info("SMT: Force disabled\n");
423         cpu_smt_control = CPU_SMT_FORCE_DISABLED;
424     } else {
425         pr_info("SMT: disabled\n");
426         cpu_smt_control = CPU_SMT_DISABLED;
427     }
428 }
429 
430 /*
431  * The decision whether SMT is supported can only be done after the full
432  * CPU identification. Called from architecture code.
433  */
cpu_smt_check_topology(void)434 void __init cpu_smt_check_topology(void)
435 {
436     if (!topology_smt_supported()) {
437         cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
438     }
439 }
440 
smt_cmdline_disable(char * str)441 static int __init smt_cmdline_disable(char *str)
442 {
443     cpu_smt_disable(str && !strcmp(str, "force"));
444     return 0;
445 }
446 early_param("nosmt", smt_cmdline_disable);
447 
cpu_smt_allowed(unsigned int cpu)448 static inline bool cpu_smt_allowed(unsigned int cpu)
449 {
450     if (cpu_smt_control == CPU_SMT_ENABLED) {
451         return true;
452     }
453 
454     if (topology_is_primary_thread(cpu)) {
455         return true;
456     }
457 
458     /*
459      * On x86 it's required to boot all logical CPUs at least once so
460      * that the init code can get a chance to set CR4.MCE on each
461      * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
462      * core will shutdown the machine.
463      */
464     return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
465 }
466 
467 /* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
cpu_smt_possible(void)468 bool cpu_smt_possible(void)
469 {
470     return cpu_smt_control != CPU_SMT_FORCE_DISABLED && cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
471 }
472 EXPORT_SYMBOL_GPL(cpu_smt_possible);
473 #else
cpu_smt_allowed(unsigned int cpu)474 static inline bool cpu_smt_allowed(unsigned int cpu)
475 {
476     return true;
477 }
478 #endif
479 
cpuhp_set_state(struct cpuhp_cpu_state * st,enum cpuhp_state target)480 static inline enum cpuhp_state cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
481 {
482     enum cpuhp_state prev_state = st->state;
483 
484     st->rollback = false;
485     st->last = NULL;
486 
487     st->target = target;
488     st->single = false;
489     st->bringup = st->state < target;
490 
491     return prev_state;
492 }
493 
cpuhp_reset_state(struct cpuhp_cpu_state * st,enum cpuhp_state prev_state)494 static inline void cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
495 {
496     st->rollback = true;
497 
498     /*
499      * If we have st->last we need to undo partial multi_instance of this
500      * state first. Otherwise start undo at the previous state.
501      */
502     if (!st->last) {
503         if (st->bringup) {
504             st->state--;
505         } else {
506             st->state++;
507         }
508     }
509 
510     st->target = prev_state;
511     st->bringup = !st->bringup;
512 }
513 
514 /* Regular hotplug invocation of the AP hotplug thread */
_cpuhp_kick_ap(struct cpuhp_cpu_state * st)515 static void _cpuhp_kick_ap(struct cpuhp_cpu_state *st)
516 {
517     if (!st->single && st->state == st->target) {
518         return;
519     }
520 
521     st->result = 0;
522     /*
523      * Make sure the above stores are visible before should_run becomes
524      * true. Paired with the mb() above in cpuhp_thread_fun()
525      */
526     smp_mb();
527     st->should_run = true;
528     wake_up_process(st->thread);
529     wait_for_ap_thread(st, st->bringup);
530 }
531 
cpuhp_kick_ap(struct cpuhp_cpu_state * st,enum cpuhp_state target)532 static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
533 {
534     enum cpuhp_state prev_state;
535     int ret;
536 
537     prev_state = cpuhp_set_state(st, target);
538     _cpuhp_kick_ap(st);
539     if ((ret = st->result)) {
540         cpuhp_reset_state(st, prev_state);
541         _cpuhp_kick_ap(st);
542     }
543 
544     return ret;
545 }
546 
bringup_wait_for_ap(unsigned int cpu)547 static int bringup_wait_for_ap(unsigned int cpu)
548 {
549     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
550 
551     /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
552     wait_for_ap_thread(st, true);
553     if (WARN_ON_ONCE((!cpu_online(cpu)))) {
554         return -ECANCELED;
555     }
556 
557     /* Unpark the hotplug thread of the target cpu */
558     kthread_unpark(st->thread);
559 
560     /*
561      * SMT soft disabling on X86 requires to bring the CPU out of the
562      * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
563      * CPU marked itself as booted_once in notify_cpu_starting() so the
564      * cpu_smt_allowed() check will now return false if this is not the
565      * primary sibling.
566      */
567     if (!cpu_smt_allowed(cpu)) {
568         return -ECANCELED;
569     }
570 
571     if (st->target <= CPUHP_AP_ONLINE_IDLE) {
572         return 0;
573     }
574 
575     return cpuhp_kick_ap(st, st->target);
576 }
577 
bringup_cpu(unsigned int cpu)578 static int bringup_cpu(unsigned int cpu)
579 {
580     struct task_struct *idle = idle_thread_get(cpu);
581     int ret;
582 
583     /*
584      * Reset stale stack state from the last time this CPU was online.
585      */
586     scs_task_reset(idle);
587     kasan_unpoison_task_stack(idle);
588 
589     /*
590      * Some architectures have to walk the irq descriptors to
591      * setup the vector space for the cpu which comes online.
592      * Prevent irq alloc/free across the bringup.
593      */
594     irq_lock_sparse();
595 
596     /* Arch-specific enabling code. */
597     ret = __cpu_up(cpu, idle);
598     irq_unlock_sparse();
599     if (ret) {
600         return ret;
601     }
602     return bringup_wait_for_ap(cpu);
603 }
604 
finish_cpu(unsigned int cpu)605 static int finish_cpu(unsigned int cpu)
606 {
607     struct task_struct *idle = idle_thread_get(cpu);
608     struct mm_struct *mm = idle->active_mm;
609 
610     /*
611      * idle_task_exit() will have switched to &init_mm, now
612      * clean up any remaining active_mm state.
613      */
614     if (mm != &init_mm) {
615         idle->active_mm = &init_mm;
616     }
617     mmdrop(mm);
618     return 0;
619 }
620 
621 /*
622  * Hotplug state machine related functions
623  */
624 
undo_cpu_up(unsigned int cpu,struct cpuhp_cpu_state * st)625 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
626 {
627     for (st->state--; st->state > st->target; st->state--) {
628         cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
629     }
630 }
631 
can_rollback_cpu(struct cpuhp_cpu_state * st)632 static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
633 {
634     if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
635         return true;
636     }
637     /*
638      * When CPU hotplug is disabled, then taking the CPU down is not
639      * possible because takedown_cpu() and the architecture and
640      * subsystem specific mechanisms are not available. So the CPU
641      * which would be completely unplugged again needs to stay around
642      * in the current state.
643      */
644     return st->state <= CPUHP_BRINGUP_CPU;
645 }
646 
cpuhp_up_callbacks(unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)647 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
648 {
649     enum cpuhp_state prev_state = st->state;
650     int ret = 0;
651 
652     while (st->state < target) {
653         st->state++;
654         ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
655         if (ret) {
656             if (can_rollback_cpu(st)) {
657                 st->target = prev_state;
658                 undo_cpu_up(cpu, st);
659             }
660             break;
661         }
662     }
663     return ret;
664 }
665 
666 /*
667  * The cpu hotplug threads manage the bringup and teardown of the cpus
668  */
cpuhp_create(unsigned int cpu)669 static void cpuhp_create(unsigned int cpu)
670 {
671     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
672 
673     init_completion(&st->done_up);
674     init_completion(&st->done_down);
675 }
676 
cpuhp_should_run(unsigned int cpu)677 static int cpuhp_should_run(unsigned int cpu)
678 {
679     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
680 
681     return st->should_run;
682 }
683 
684 /*
685  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
686  * callbacks when a state gets [un]installed at runtime.
687  *
688  * Each invocation of this function by the smpboot thread does a single AP
689  * state callback.
690  *
691  * It has 3 modes of operation:
692  *  - single: runs st->cb_state
693  *  - up:     runs ++st->state, while st->state < st->target
694  *  - down:   runs st->state--, while st->state > st->target
695  *
696  * When complete or on error, should_run is cleared and the completion is fired.
697  */
cpuhp_thread_fun(unsigned int cpu)698 static void cpuhp_thread_fun(unsigned int cpu)
699 {
700     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
701     bool bringup = st->bringup;
702     enum cpuhp_state state;
703 
704     if (WARN_ON_ONCE(!st->should_run)) {
705         return;
706     }
707 
708     /*
709      * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
710      * that if we see ->should_run we also see the rest of the state.
711      */
712     smp_mb();
713 
714     /*
715      * The BP holds the hotplug lock, but we're now running on the AP,
716      * ensure that anybody asserting the lock is held, will actually find
717      * it so.
718      */
719     lockdep_acquire_cpus_lock();
720     cpuhp_lock_acquire(bringup);
721 
722     if (st->single) {
723         state = st->cb_state;
724         st->should_run = false;
725     } else {
726         if (bringup) {
727             st->state++;
728             state = st->state;
729             st->should_run = (st->state < st->target);
730             WARN_ON_ONCE(st->state > st->target);
731         } else {
732             state = st->state;
733             st->state--;
734             st->should_run = (st->state > st->target);
735             WARN_ON_ONCE(st->state < st->target);
736         }
737     }
738 
739     WARN_ON_ONCE(!cpuhp_is_ap_state(state));
740 
741     if (cpuhp_is_atomic_state(state)) {
742         local_irq_disable();
743         st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
744         local_irq_enable();
745 
746         /*
747          * STARTING/DYING must not fail!
748          */
749         WARN_ON_ONCE(st->result);
750     } else {
751         st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
752     }
753 
754     if (st->result) {
755         /*
756          * If we fail on a rollback, we're up a creek without no
757          * paddle, no way forward, no way back. We loose, thanks for
758          * playing.
759          */
760         WARN_ON_ONCE(st->rollback);
761         st->should_run = false;
762     }
763 
764     cpuhp_lock_release(bringup);
765     lockdep_release_cpus_lock();
766 
767     if (!st->should_run) {
768         complete_ap_thread(st, bringup);
769     }
770 }
771 
772 /* Invoke a single callback on a remote cpu */
cpuhp_invoke_ap_callback(int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node)773 static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node)
774 {
775     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
776     int ret;
777 
778     if (!cpu_online(cpu)) {
779         return 0;
780     }
781 
782     cpuhp_lock_acquire(false);
783     cpuhp_lock_release(false);
784 
785     cpuhp_lock_acquire(true);
786     cpuhp_lock_release(true);
787 
788     /*
789      * If we are up and running, use the hotplug thread. For early calls
790      * we invoke the thread function directly.
791      */
792     if (!st->thread) {
793         return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
794     }
795 
796     st->rollback = false;
797     st->last = NULL;
798 
799     st->node = node;
800     st->bringup = bringup;
801     st->cb_state = state;
802     st->single = true;
803 
804     _cpuhp_kick_ap(st);
805 
806     /*
807      * If we failed and did a partial, do a rollback.
808      */
809     if ((ret = st->result) && st->last) {
810         st->rollback = true;
811         st->bringup = !bringup;
812 
813         _cpuhp_kick_ap(st);
814     }
815 
816     /*
817      * Clean up the leftovers so the next hotplug operation wont use stale
818      * data.
819      */
820     st->node = st->last = NULL;
821     return ret;
822 }
823 
cpuhp_kick_ap_work(unsigned int cpu)824 static int cpuhp_kick_ap_work(unsigned int cpu)
825 {
826     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
827     enum cpuhp_state prev_state = st->state;
828     int ret;
829 
830     cpuhp_lock_acquire(false);
831     cpuhp_lock_release(false);
832 
833     cpuhp_lock_acquire(true);
834     cpuhp_lock_release(true);
835 
836     trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
837     ret = cpuhp_kick_ap(st, st->target);
838     trace_cpuhp_exit(cpu, st->state, prev_state, ret);
839 
840     return ret;
841 }
842 
843 static struct smp_hotplug_thread cpuhp_threads = {
844     .store = &cpuhp_state.thread,
845     .create = &cpuhp_create,
846     .thread_should_run = cpuhp_should_run,
847     .thread_fn = cpuhp_thread_fun,
848     .thread_comm = "cpuhp/%u",
849     .selfparking = true,
850 };
851 
cpuhp_threads_init(void)852 void __init cpuhp_threads_init(void)
853 {
854     BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
855     kthread_unpark(this_cpu_read(cpuhp_state.thread));
856 }
857 
858 /*
859  *
860  * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
861  * protected region.
862  *
863  * The operation is still serialized against concurrent CPU hotplug via
864  * cpu_add_remove_lock, i.e. CPU map protection.  But it is _not_
865  * serialized against other hotplug related activity like adding or
866  * removing of state callbacks and state instances, which invoke either the
867  * startup or the teardown callback of the affected state.
868  *
869  * This is required for subsystems which are unfixable vs. CPU hotplug and
870  * evade lock inversion problems by scheduling work which has to be
871  * completed _before_ cpu_up()/_cpu_down() returns.
872  *
873  * Don't even think about adding anything to this for any new code or even
874  * drivers. It's only purpose is to keep existing lock order trainwrecks
875  * working.
876  *
877  * For cpu_down() there might be valid reasons to finish cleanups which are
878  * not required to be done under cpu_hotplug_lock, but that's a different
879  * story and would be not invoked via this.
880  */
cpu_up_down_serialize_trainwrecks(bool tasks_frozen)881 static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
882 {
883     /*
884      * cpusets delegate hotplug operations to a worker to "solve" the
885      * lock order problems. Wait for the worker, but only if tasks are
886      * _not_ frozen (suspend, hibernate) as that would wait forever.
887      *
888      * The wait is required because otherwise the hotplug operation
889      * returns with inconsistent state, which could even be observed in
890      * user space when a new CPU is brought up. The CPU plug uevent
891      * would be delivered and user space reacting on it would fail to
892      * move tasks to the newly plugged CPU up to the point where the
893      * work has finished because up to that point the newly plugged CPU
894      * is not assignable in cpusets/cgroups. On unplug that's not
895      * necessarily a visible issue, but it is still inconsistent state,
896      * which is the real problem which needs to be "fixed". This can't
897      * prevent the transient state between scheduling the work and
898      * returning from waiting for it.
899      */
900     if (!tasks_frozen) {
901         cpuset_wait_for_hotplug();
902     }
903 }
904 
905 #ifdef CONFIG_HOTPLUG_CPU
906 #ifndef arch_clear_mm_cpumask_cpu
907 #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
908 #endif
909 
910 /**
911  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
912  * @cpu: a CPU id
913  *
914  * This function walks all processes, finds a valid mm struct for each one and
915  * then clears a corresponding bit in mm's cpumask.  While this all sounds
916  * trivial, there are various non-obvious corner cases, which this function
917  * tries to solve in a safe manner.
918  *
919  * Also note that the function uses a somewhat relaxed locking scheme, so it may
920  * be called only for an already offlined CPU.
921  */
clear_tasks_mm_cpumask(int cpu)922 void clear_tasks_mm_cpumask(int cpu)
923 {
924     struct task_struct *p;
925 
926     /*
927      * This function is called after the cpu is taken down and marked
928      * offline, so its not like new tasks will ever get this cpu set in
929      * their mm mask. -- Peter Zijlstra
930      * Thus, we may use rcu_read_lock() here, instead of grabbing
931      * full-fledged tasklist_lock.
932      */
933     WARN_ON(cpu_online(cpu));
934     rcu_read_lock();
935     for_each_process(p)
936     {
937         struct task_struct *t;
938 
939         /*
940          * Main thread might exit, but other threads may still have
941          * a valid mm. Find one.
942          */
943         t = find_lock_task_mm(p);
944         if (!t) {
945             continue;
946         }
947         arch_clear_mm_cpumask_cpu(cpu, t->mm);
948         task_unlock(t);
949     }
950     rcu_read_unlock();
951 }
952 
953 /* Take this CPU down. */
take_cpu_down(void * _param)954 static int take_cpu_down(void *_param)
955 {
956     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
957     enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
958     int err, cpu = smp_processor_id();
959     int ret;
960 
961     /* Ensure this CPU doesn't handle any more interrupts. */
962     err = __cpu_disable();
963     if (err < 0) {
964         return err;
965     }
966 
967     /*
968      * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
969      * do this step again.
970      */
971     WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
972     st->state--;
973     /* Invoke the former CPU_DYING callbacks */
974     for (; st->state > target; st->state--) {
975         ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
976         /*
977          * DYING must not fail!
978          */
979         WARN_ON_ONCE(ret);
980     }
981 
982     /* Give up timekeeping duties */
983     tick_handover_do_timer();
984     /* Remove CPU from timer broadcasting */
985     tick_offline_cpu(cpu);
986     /* Park the stopper thread */
987     stop_machine_park(cpu);
988     return 0;
989 }
990 
takedown_cpu(unsigned int cpu)991 static int takedown_cpu(unsigned int cpu)
992 {
993     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
994     int err;
995 
996     /* Park the smpboot threads */
997     kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
998 
999     /*
1000      * Prevent irq alloc/free while the dying cpu reorganizes the
1001      * interrupt affinities.
1002      */
1003     irq_lock_sparse();
1004 
1005     /*
1006      * So now all preempt/rcu users must observe !cpu_active().
1007      */
1008     err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
1009     if (err) {
1010         /* CPU refused to die */
1011         irq_unlock_sparse();
1012         /* Unpark the hotplug thread so we can rollback there */
1013         kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
1014         return err;
1015     }
1016     BUG_ON(cpu_online(cpu));
1017 
1018     /*
1019      * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
1020      * all runnable tasks from the CPU, there's only the idle task left now
1021      * that the migration thread is done doing the stop_machine thing.
1022      *
1023      * Wait for the stop thread to go away.
1024      */
1025     wait_for_ap_thread(st, false);
1026     BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
1027 
1028     /* Interrupts are moved away from the dying cpu, reenable alloc/free */
1029     irq_unlock_sparse();
1030 
1031     hotplug_cpu__broadcast_tick_pull(cpu);
1032     /* This actually kills the CPU. */
1033     __cpu_die(cpu);
1034 
1035     tick_cleanup_dead_cpu(cpu);
1036     rcutree_migrate_callbacks(cpu);
1037     return 0;
1038 }
1039 
cpuhp_complete_idle_dead(void * arg)1040 static void cpuhp_complete_idle_dead(void *arg)
1041 {
1042     struct cpuhp_cpu_state *st = arg;
1043 
1044     complete_ap_thread(st, false);
1045 }
1046 
cpuhp_report_idle_dead(void)1047 void cpuhp_report_idle_dead(void)
1048 {
1049     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1050 
1051     BUG_ON(st->state != CPUHP_AP_OFFLINE);
1052     rcu_report_dead(smp_processor_id());
1053     st->state = CPUHP_AP_IDLE_DEAD;
1054     /*
1055      * We cannot call complete after rcu_report_dead() so we delegate it
1056      * to an online cpu.
1057      */
1058     smp_call_function_single(cpumask_first(cpu_online_mask), cpuhp_complete_idle_dead, st, 0);
1059 }
1060 
undo_cpu_down(unsigned int cpu,struct cpuhp_cpu_state * st)1061 static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
1062 {
1063     for (st->state++; st->state < st->target; st->state++) {
1064         cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1065     }
1066 }
1067 
cpuhp_down_callbacks(unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)1068 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
1069 {
1070     enum cpuhp_state prev_state = st->state;
1071     int ret = 0;
1072 
1073     for (; st->state > target; st->state--) {
1074         ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
1075         if (ret) {
1076             st->target = prev_state;
1077             if (st->state < prev_state) {
1078                 undo_cpu_down(cpu, st);
1079             }
1080             break;
1081         }
1082     }
1083     return ret;
1084 }
1085 
1086 /* Requires cpu_add_remove_lock to be held */
_cpu_down(unsigned int cpu,int tasks_frozen,enum cpuhp_state target)1087 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1088 {
1089     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1090     int prev_state, ret = 0;
1091 
1092     if (num_active_cpus() == 1 && cpu_active(cpu)) {
1093         return -EBUSY;
1094     }
1095 
1096     if (!cpu_present(cpu)) {
1097         return -EINVAL;
1098     }
1099 
1100 #ifdef CONFIG_CPU_ISOLATION_OPT
1101     if (!tasks_frozen && !cpu_isolated(cpu) && num_online_uniso_cpus() == 1) {
1102         return -EBUSY;
1103     }
1104 #endif
1105 
1106     cpus_write_lock();
1107 
1108     cpuhp_tasks_frozen = tasks_frozen;
1109 
1110     prev_state = cpuhp_set_state(st, target);
1111     /*
1112      * If the current CPU state is in the range of the AP hotplug thread,
1113      * then we need to kick the thread.
1114      */
1115     if (st->state > CPUHP_TEARDOWN_CPU) {
1116         st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1117         ret = cpuhp_kick_ap_work(cpu);
1118         /*
1119          * The AP side has done the error rollback already. Just
1120          * return the error code..
1121          */
1122         if (ret) {
1123             goto out;
1124         }
1125 
1126         /*
1127          * We might have stopped still in the range of the AP hotplug
1128          * thread. Nothing to do anymore.
1129          */
1130         if (st->state > CPUHP_TEARDOWN_CPU) {
1131             goto out;
1132         }
1133 
1134         st->target = target;
1135     }
1136     /*
1137      * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1138      * to do the further cleanups.
1139      */
1140     ret = cpuhp_down_callbacks(cpu, st, target);
1141     if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
1142         cpuhp_reset_state(st, prev_state);
1143         _cpuhp_kick_ap(st);
1144     }
1145 
1146 out:
1147     cpus_write_unlock();
1148     /*
1149      * Do post unplug cleanup. This is still protected against
1150      * concurrent CPU hotplug via cpu_add_remove_lock.
1151      */
1152     lockup_detector_cleanup();
1153     arch_smt_update();
1154     cpu_up_down_serialize_trainwrecks(tasks_frozen);
1155     return ret;
1156 }
1157 
cpu_down_maps_locked(unsigned int cpu,enum cpuhp_state target)1158 static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1159 {
1160     if (cpu_hotplug_disabled) {
1161         return -EBUSY;
1162     }
1163     return _cpu_down(cpu, 0, target);
1164 }
1165 
cpu_down(unsigned int cpu,enum cpuhp_state target)1166 static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1167 {
1168     int err;
1169 
1170     cpu_maps_update_begin();
1171     err = cpu_down_maps_locked(cpu, target);
1172     cpu_maps_update_done();
1173     return err;
1174 }
1175 
1176 /**
1177  * cpu_device_down - Bring down a cpu device
1178  * @dev: Pointer to the cpu device to offline
1179  *
1180  * This function is meant to be used by device core cpu subsystem only.
1181  *
1182  * Other subsystems should use remove_cpu() instead.
1183  */
cpu_device_down(struct device * dev)1184 int cpu_device_down(struct device *dev)
1185 {
1186     return cpu_down(dev->id, CPUHP_OFFLINE);
1187 }
1188 
remove_cpu(unsigned int cpu)1189 int remove_cpu(unsigned int cpu)
1190 {
1191     int ret;
1192 
1193     lock_device_hotplug();
1194     ret = device_offline(get_cpu_device(cpu));
1195     unlock_device_hotplug();
1196 
1197     return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(remove_cpu);
1200 
smp_shutdown_nonboot_cpus(unsigned int primary_cpu)1201 void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
1202 {
1203     unsigned int cpu;
1204     int error;
1205 
1206     cpu_maps_update_begin();
1207 
1208     /*
1209      * Make certain the cpu I'm about to reboot on is online.
1210      *
1211      * This is inline to what migrate_to_reboot_cpu() already do.
1212      */
1213     if (!cpu_online(primary_cpu)) {
1214         primary_cpu = cpumask_first(cpu_online_mask);
1215     }
1216 
1217     for_each_online_cpu(cpu)
1218     {
1219         if (cpu == primary_cpu) {
1220             continue;
1221         }
1222 
1223         error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
1224         if (error) {
1225             pr_err("Failed to offline CPU%d - error=%d", cpu, error);
1226             break;
1227         }
1228     }
1229 
1230     /*
1231      * Ensure all but the reboot CPU are offline.
1232      */
1233     BUG_ON(num_online_cpus() > 1);
1234 
1235     /*
1236      * Make sure the CPUs won't be enabled by someone else after this
1237      * point. Kexec will reboot to a new kernel shortly resetting
1238      * everything along the way.
1239      */
1240     cpu_hotplug_disabled++;
1241 
1242     cpu_maps_update_done();
1243 }
1244 
1245 #else
1246 #define takedown_cpu NULL
1247 #endif /* CONFIG_HOTPLUG_CPU */
1248 
1249 /**
1250  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1251  * @cpu: cpu that just started
1252  *
1253  * It must be called by the arch code on the new cpu, before the new cpu
1254  * enables interrupts and before the "boot" cpu returns from __cpu_up().
1255  */
notify_cpu_starting(unsigned int cpu)1256 void notify_cpu_starting(unsigned int cpu)
1257 {
1258     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1259     enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1260     int ret;
1261 
1262     rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1263     cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1264     while (st->state < target) {
1265         st->state++;
1266         ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1267         /*
1268          * STARTING must not fail!
1269          */
1270         WARN_ON_ONCE(ret);
1271     }
1272 }
1273 
1274 /*
1275  * Called from the idle task. Wake up the controlling task which brings the
1276  * hotplug thread of the upcoming CPU up and then delegates the rest of the
1277  * online bringup to the hotplug thread.
1278  */
cpuhp_online_idle(enum cpuhp_state state)1279 void cpuhp_online_idle(enum cpuhp_state state)
1280 {
1281     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1282 
1283     /* Happens for the boot cpu */
1284     if (state != CPUHP_AP_ONLINE_IDLE) {
1285         return;
1286     }
1287 
1288     /*
1289      * Unpart the stopper thread before we start the idle loop (and start
1290      * scheduling); this ensures the stopper task is always available.
1291      */
1292     stop_machine_unpark(smp_processor_id());
1293 
1294     st->state = CPUHP_AP_ONLINE_IDLE;
1295     complete_ap_thread(st, true);
1296 }
1297 
1298 /* Requires cpu_add_remove_lock to be held */
_cpu_up(unsigned int cpu,int tasks_frozen,enum cpuhp_state target)1299 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1300 {
1301     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1302     struct task_struct *idle;
1303     int ret = 0;
1304 
1305     cpus_write_lock();
1306 
1307     if (!cpu_present(cpu)) {
1308         ret = -EINVAL;
1309         goto out;
1310     }
1311 
1312     /*
1313      * The caller of cpu_up() might have raced with another
1314      * caller. Nothing to do.
1315      */
1316     if (st->state >= target) {
1317         goto out;
1318     }
1319 
1320     if (st->state == CPUHP_OFFLINE) {
1321         /* Let it fail before we try to bring the cpu up */
1322         idle = idle_thread_get(cpu);
1323         if (IS_ERR(idle)) {
1324             ret = PTR_ERR(idle);
1325             goto out;
1326         }
1327     }
1328 
1329     cpuhp_tasks_frozen = tasks_frozen;
1330 
1331     cpuhp_set_state(st, target);
1332     /*
1333      * If the current CPU state is in the range of the AP hotplug thread,
1334      * then we need to kick the thread once more.
1335      */
1336     if (st->state > CPUHP_BRINGUP_CPU) {
1337         ret = cpuhp_kick_ap_work(cpu);
1338         /*
1339          * The AP side has done the error rollback already. Just
1340          * return the error code..
1341          */
1342         if (ret) {
1343             goto out;
1344         }
1345     }
1346 
1347     /*
1348      * Try to reach the target state. We max out on the BP at
1349      * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1350      * responsible for bringing it up to the target state.
1351      */
1352     target = min((int)target, CPUHP_BRINGUP_CPU);
1353     ret = cpuhp_up_callbacks(cpu, st, target);
1354 out:
1355     cpus_write_unlock();
1356     arch_smt_update();
1357     cpu_up_down_serialize_trainwrecks(tasks_frozen);
1358     return ret;
1359 }
1360 
cpu_up(unsigned int cpu,enum cpuhp_state target)1361 static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1362 {
1363     int err = 0;
1364 
1365     if (!cpu_possible(cpu)) {
1366         pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", cpu);
1367 #if defined(CONFIG_IA64)
1368         pr_err("please check additional_cpus= boot parameter\n");
1369 #endif
1370         return -EINVAL;
1371     }
1372 
1373     err = try_online_node(cpu_to_node(cpu));
1374     if (err) {
1375         return err;
1376     }
1377 
1378     cpu_maps_update_begin();
1379 
1380     if (cpu_hotplug_disabled) {
1381         err = -EBUSY;
1382         goto out;
1383     }
1384     if (!cpu_smt_allowed(cpu)) {
1385         err = -EPERM;
1386         goto out;
1387     }
1388 
1389     err = _cpu_up(cpu, 0, target);
1390 out:
1391     cpu_maps_update_done();
1392     return err;
1393 }
1394 
1395 /**
1396  * cpu_device_up - Bring up a cpu device
1397  * @dev: Pointer to the cpu device to online
1398  *
1399  * This function is meant to be used by device core cpu subsystem only.
1400  *
1401  * Other subsystems should use add_cpu() instead.
1402  */
cpu_device_up(struct device * dev)1403 int cpu_device_up(struct device *dev)
1404 {
1405     return cpu_up(dev->id, CPUHP_ONLINE);
1406 }
1407 
add_cpu(unsigned int cpu)1408 int add_cpu(unsigned int cpu)
1409 {
1410     int ret;
1411 
1412     lock_device_hotplug();
1413     ret = device_online(get_cpu_device(cpu));
1414     unlock_device_hotplug();
1415 
1416     return ret;
1417 }
1418 EXPORT_SYMBOL_GPL(add_cpu);
1419 
1420 /**
1421  * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
1422  * @sleep_cpu: The cpu we hibernated on and should be brought up.
1423  *
1424  * On some architectures like arm64, we can hibernate on any CPU, but on
1425  * wake up the CPU we hibernated on might be offline as a side effect of
1426  * using maxcpus= for example.
1427  */
bringup_hibernate_cpu(unsigned int sleep_cpu)1428 int bringup_hibernate_cpu(unsigned int sleep_cpu)
1429 {
1430     int ret;
1431 
1432     if (!cpu_online(sleep_cpu)) {
1433         pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
1434         ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
1435         if (ret) {
1436             pr_err("Failed to bring hibernate-CPU up!\n");
1437             return ret;
1438         }
1439     }
1440     return 0;
1441 }
1442 
bringup_nonboot_cpus(unsigned int setup_max_cpus)1443 void bringup_nonboot_cpus(unsigned int setup_max_cpus)
1444 {
1445     unsigned int cpu;
1446 
1447     for_each_present_cpu(cpu)
1448     {
1449         if (num_online_cpus() >= setup_max_cpus) {
1450             break;
1451         }
1452         if (!cpu_online(cpu)) {
1453             cpu_up(cpu, CPUHP_ONLINE);
1454         }
1455     }
1456 }
1457 
1458 #ifdef CONFIG_PM_SLEEP_SMP
1459 static cpumask_var_t frozen_cpus;
1460 
freeze_secondary_cpus(int primary)1461 int freeze_secondary_cpus(int primary)
1462 {
1463     int cpu, error = 0;
1464 
1465     cpu_maps_update_begin();
1466     if (primary == -1) {
1467         primary = cpumask_first(cpu_online_mask);
1468         if (!housekeeping_cpu(primary, HK_FLAG_TIMER)) {
1469             primary = housekeeping_any_cpu(HK_FLAG_TIMER);
1470         }
1471     } else {
1472         if (!cpu_online(primary)) {
1473             primary = cpumask_first(cpu_online_mask);
1474         }
1475     }
1476 
1477     /*
1478      * We take down all of the non-boot CPUs in one shot to avoid races
1479      * with the userspace trying to use the CPU hotplug at the same time
1480      */
1481     cpumask_clear(frozen_cpus);
1482 
1483     pr_info("Disabling non-boot CPUs ...\n");
1484     for_each_online_cpu(cpu)
1485     {
1486         if (cpu == primary) {
1487             continue;
1488         }
1489 
1490         if (pm_wakeup_pending()) {
1491             pr_info("Wakeup pending. Abort CPU freeze\n");
1492             error = -EBUSY;
1493             break;
1494         }
1495 
1496         trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1497         error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1498         trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1499         if (!error) {
1500             cpumask_set_cpu(cpu, frozen_cpus);
1501         } else {
1502             pr_err("Error taking CPU%d down: %d\n", cpu, error);
1503             break;
1504         }
1505     }
1506 
1507     if (!error) {
1508         BUG_ON(num_online_cpus() > 1);
1509     } else {
1510         pr_err("Non-boot CPUs are not disabled\n");
1511     }
1512 
1513     /*
1514      * Make sure the CPUs won't be enabled by someone else. We need to do
1515      * this even in case of failure as all freeze_secondary_cpus() users are
1516      * supposed to do thaw_secondary_cpus() on the failure path.
1517      */
1518     cpu_hotplug_disabled++;
1519 
1520     cpu_maps_update_done();
1521     return error;
1522 }
1523 
arch_thaw_secondary_cpus_begin(void)1524 void __weak arch_thaw_secondary_cpus_begin(void)
1525 {
1526 }
1527 
arch_thaw_secondary_cpus_end(void)1528 void __weak arch_thaw_secondary_cpus_end(void)
1529 {
1530 }
1531 
thaw_secondary_cpus(void)1532 void thaw_secondary_cpus(void)
1533 {
1534     int cpu, error;
1535 
1536     /* Allow everyone to use the CPU hotplug again */
1537     cpu_maps_update_begin();
1538     _cpu_hotplug_enable();
1539     if (cpumask_empty(frozen_cpus)) {
1540         goto out;
1541     }
1542 
1543     pr_info("Enabling non-boot CPUs ...\n");
1544 
1545     arch_thaw_secondary_cpus_begin();
1546 
1547     for_each_cpu(cpu, frozen_cpus)
1548     {
1549         trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1550         error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1551         trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1552         if (!error) {
1553             pr_info("CPU%d is up\n", cpu);
1554             continue;
1555         }
1556         pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1557     }
1558 
1559     arch_thaw_secondary_cpus_end();
1560 
1561     cpumask_clear(frozen_cpus);
1562 out:
1563     cpu_maps_update_done();
1564 }
1565 
alloc_frozen_cpus(void)1566 static int __init alloc_frozen_cpus(void)
1567 {
1568     if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL | __GFP_ZERO)) {
1569         return -ENOMEM;
1570     }
1571     return 0;
1572 }
1573 core_initcall(alloc_frozen_cpus);
1574 
1575 /*
1576  * When callbacks for CPU hotplug notifications are being executed, we must
1577  * ensure that the state of the system with respect to the tasks being frozen
1578  * or not, as reported by the notification, remains unchanged *throughout the
1579  * duration* of the execution of the callbacks.
1580  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1581  *
1582  * This synchronization is implemented by mutually excluding regular CPU
1583  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1584  * Hibernate notifications.
1585  */
cpu_hotplug_pm_callback(struct notifier_block * nb,unsigned long action,void * ptr)1586 static int cpu_hotplug_pm_callback(struct notifier_block *nb, unsigned long action, void *ptr)
1587 {
1588     switch (action) {
1589         case PM_SUSPEND_PREPARE:
1590         case PM_HIBERNATION_PREPARE:
1591             cpu_hotplug_disable();
1592             break;
1593 
1594         case PM_POST_SUSPEND:
1595         case PM_POST_HIBERNATION:
1596             cpu_hotplug_enable();
1597             break;
1598 
1599         default:
1600             return NOTIFY_DONE;
1601     }
1602 
1603     return NOTIFY_OK;
1604 }
1605 
cpu_hotplug_pm_sync_init(void)1606 static int __init cpu_hotplug_pm_sync_init(void)
1607 {
1608     /*
1609      * cpu_hotplug_pm_callback has higher priority than x86
1610      * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1611      * to disable cpu hotplug to avoid cpu hotplug race.
1612      */
1613     pm_notifier(cpu_hotplug_pm_callback, 0);
1614     return 0;
1615 }
1616 core_initcall(cpu_hotplug_pm_sync_init);
1617 
1618 #endif /* CONFIG_PM_SLEEP_SMP */
1619 
1620 int __boot_cpu_id;
1621 
1622 #endif /* CONFIG_SMP */
1623 
1624 /* Boot processor state steps */
1625 static struct cpuhp_step cpuhp_hp_states[] = {
1626     [CPUHP_OFFLINE] =
1627         {
1628             .name = "offline",
1629             .startup.single = NULL,
1630             .teardown.single = NULL,
1631         },
1632 #ifdef CONFIG_SMP
1633     [CPUHP_CREATE_THREADS] =
1634         {
1635             .name = "threads:prepare",
1636             .startup.single = smpboot_create_threads,
1637             .teardown.single = NULL,
1638             .cant_stop = true,
1639         },
1640     [CPUHP_PERF_PREPARE] =
1641         {
1642             .name = "perf:prepare",
1643             .startup.single = perf_event_init_cpu,
1644             .teardown.single = perf_event_exit_cpu,
1645         },
1646     [CPUHP_RANDOM_PREPARE] = {
1647         .name			= "random:prepare",
1648         .startup.single		= random_prepare_cpu,
1649         .teardown.single	= NULL,
1650     },
1651 
1652     [CPUHP_WORKQUEUE_PREP] =
1653         {
1654             .name = "workqueue:prepare",
1655             .startup.single = workqueue_prepare_cpu,
1656             .teardown.single = NULL,
1657         },
1658     [CPUHP_HRTIMERS_PREPARE] =
1659         {
1660             .name = "hrtimers:prepare",
1661             .startup.single = hrtimers_prepare_cpu,
1662             .teardown.single = hrtimers_dead_cpu,
1663         },
1664     [CPUHP_SMPCFD_PREPARE] =
1665         {
1666             .name = "smpcfd:prepare",
1667             .startup.single = smpcfd_prepare_cpu,
1668             .teardown.single = smpcfd_dead_cpu,
1669         },
1670     [CPUHP_RELAY_PREPARE] =
1671         {
1672             .name = "relay:prepare",
1673             .startup.single = relay_prepare_cpu,
1674             .teardown.single = NULL,
1675         },
1676     [CPUHP_SLAB_PREPARE] =
1677         {
1678             .name = "slab:prepare",
1679             .startup.single = slab_prepare_cpu,
1680             .teardown.single = slab_dead_cpu,
1681         },
1682     [CPUHP_RCUTREE_PREP] =
1683         {
1684             .name = "RCU/tree:prepare",
1685             .startup.single = rcutree_prepare_cpu,
1686             .teardown.single = rcutree_dead_cpu,
1687         },
1688     /*
1689      * On the tear-down path, timers_dead_cpu() must be invoked
1690      * before blk_mq_queue_reinit_notify() from notify_dead(),
1691      * otherwise a RCU stall occurs.
1692      */
1693     [CPUHP_TIMERS_PREPARE] =
1694         {
1695             .name = "timers:prepare",
1696             .startup.single = timers_prepare_cpu,
1697             .teardown.single = timers_dead_cpu,
1698         },
1699     /* Kicks the plugged cpu into life */
1700     [CPUHP_BRINGUP_CPU] =
1701         {
1702             .name = "cpu:bringup",
1703             .startup.single = bringup_cpu,
1704             .teardown.single = finish_cpu,
1705             .cant_stop = true,
1706         },
1707     /* Final state before CPU kills itself */
1708     [CPUHP_AP_IDLE_DEAD] =
1709         {
1710             .name = "idle:dead",
1711         },
1712     /*
1713      * Last state before CPU enters the idle loop to die. Transient state
1714      * for synchronization.
1715      */
1716     [CPUHP_AP_OFFLINE] =
1717         {
1718             .name = "ap:offline",
1719             .cant_stop = true,
1720         },
1721     /* First state is scheduler control. Interrupts are disabled */
1722     [CPUHP_AP_SCHED_STARTING] =
1723         {
1724             .name = "sched:starting",
1725             .startup.single = sched_cpu_starting,
1726             .teardown.single = sched_cpu_dying,
1727         },
1728     [CPUHP_AP_RCUTREE_DYING] =
1729         {
1730             .name = "RCU/tree:dying",
1731             .startup.single = NULL,
1732             .teardown.single = rcutree_dying_cpu,
1733         },
1734     [CPUHP_AP_SMPCFD_DYING] =
1735         {
1736             .name = "smpcfd:dying",
1737             .startup.single = NULL,
1738             .teardown.single = smpcfd_dying_cpu,
1739         },
1740     /* Entry state on starting. Interrupts enabled from here on. Transient
1741      * state for synchronsization */
1742     [CPUHP_AP_ONLINE] =
1743         {
1744             .name = "ap:online",
1745         },
1746     /*
1747      * Handled on controll processor until the plugged processor manages
1748      * this itself.
1749      */
1750     [CPUHP_TEARDOWN_CPU] =
1751         {
1752             .name = "cpu:teardown",
1753             .startup.single = NULL,
1754             .teardown.single = takedown_cpu,
1755             .cant_stop = true,
1756         },
1757     /* Handle smpboot threads park/unpark */
1758     [CPUHP_AP_SMPBOOT_THREADS] =
1759         {
1760             .name = "smpboot/threads:online",
1761             .startup.single = smpboot_unpark_threads,
1762             .teardown.single = smpboot_park_threads,
1763         },
1764     [CPUHP_AP_IRQ_AFFINITY_ONLINE] =
1765         {
1766             .name = "irq/affinity:online",
1767             .startup.single = irq_affinity_online_cpu,
1768             .teardown.single = NULL,
1769         },
1770     [CPUHP_AP_PERF_ONLINE] =
1771         {
1772             .name = "perf:online",
1773             .startup.single = perf_event_init_cpu,
1774             .teardown.single = perf_event_exit_cpu,
1775         },
1776     [CPUHP_AP_WATCHDOG_ONLINE] =
1777         {
1778             .name = "lockup_detector:online",
1779             .startup.single = lockup_detector_online_cpu,
1780             .teardown.single = lockup_detector_offline_cpu,
1781         },
1782     [CPUHP_AP_WORKQUEUE_ONLINE] =
1783         {
1784             .name = "workqueue:online",
1785             .startup.single = workqueue_online_cpu,
1786             .teardown.single = workqueue_offline_cpu,
1787         },
1788     [CPUHP_AP_RANDOM_ONLINE] = {
1789         .name			= "random:online",
1790         .startup.single		= random_online_cpu,
1791         .teardown.single	= NULL,
1792     },
1793     [CPUHP_AP_RCUTREE_ONLINE] =
1794         {
1795             .name = "RCU/tree:online",
1796             .startup.single = rcutree_online_cpu,
1797             .teardown.single = rcutree_offline_cpu,
1798         },
1799 #endif
1800 /*
1801  * The dynamically registered state space is here
1802  */
1803 
1804 #ifdef CONFIG_SMP
1805     /* Last state is scheduler control setting the cpu active */
1806     [CPUHP_AP_ACTIVE] =
1807         {
1808             .name = "sched:active",
1809             .startup.single = sched_cpu_activate,
1810             .teardown.single = sched_cpu_deactivate,
1811         },
1812 #endif
1813 
1814     /* CPU is fully up and running. */
1815     [CPUHP_ONLINE] =
1816         {
1817             .name = "online",
1818             .startup.single = NULL,
1819             .teardown.single = NULL,
1820         },
1821 };
1822 
1823 /* Sanity check for callbacks */
cpuhp_cb_check(enum cpuhp_state state)1824 static int cpuhp_cb_check(enum cpuhp_state state)
1825 {
1826     if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE) {
1827         return -EINVAL;
1828     }
1829     return 0;
1830 }
1831 
1832 /*
1833  * Returns a free for dynamic slot assignment of the Online state. The states
1834  * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1835  * by having no name assigned.
1836  */
cpuhp_reserve_state(enum cpuhp_state state)1837 static int cpuhp_reserve_state(enum cpuhp_state state)
1838 {
1839     enum cpuhp_state i, end;
1840     struct cpuhp_step *step;
1841 
1842     switch (state) {
1843         case CPUHP_AP_ONLINE_DYN:
1844             step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1845             end = CPUHP_AP_ONLINE_DYN_END;
1846             break;
1847         case CPUHP_BP_PREPARE_DYN:
1848             step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1849             end = CPUHP_BP_PREPARE_DYN_END;
1850             break;
1851         default:
1852             return -EINVAL;
1853     }
1854 
1855     for (i = state; i <= end; i++, step++) {
1856         if (!step->name) {
1857             return i;
1858         }
1859     }
1860     WARN(1, "No more dynamic states available for CPU hotplug\n");
1861     return -ENOSPC;
1862 }
1863 
cpuhp_store_callbacks(enum cpuhp_state state,const char * name,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)1864 static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, int (*startup)(unsigned int cpu),
1865                                  int (*teardown)(unsigned int cpu), bool multi_instance)
1866 {
1867     /* (Un)Install the callbacks for further cpu hotplug operations */
1868     struct cpuhp_step *sp;
1869     int ret = 0;
1870 
1871     /*
1872      * If name is NULL, then the state gets removed.
1873      *
1874      * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1875      * the first allocation from these dynamic ranges, so the removal
1876      * would trigger a new allocation and clear the wrong (already
1877      * empty) state, leaving the callbacks of the to be cleared state
1878      * dangling, which causes wreckage on the next hotplug operation.
1879      */
1880     if (name && (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN)) {
1881         ret = cpuhp_reserve_state(state);
1882         if (ret < 0) {
1883             return ret;
1884         }
1885         state = ret;
1886     }
1887     sp = cpuhp_get_step(state);
1888     if (name && sp->name) {
1889         return -EBUSY;
1890     }
1891 
1892     sp->startup.single = startup;
1893     sp->teardown.single = teardown;
1894     sp->name = name;
1895     sp->multi_instance = multi_instance;
1896     INIT_HLIST_HEAD(&sp->list);
1897     return ret;
1898 }
1899 
cpuhp_get_teardown_cb(enum cpuhp_state state)1900 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1901 {
1902     return cpuhp_get_step(state)->teardown.single;
1903 }
1904 
1905 /*
1906  * Call the startup/teardown function for a step either on the AP or
1907  * on the current CPU.
1908  */
cpuhp_issue_call(int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node)1909 static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node)
1910 {
1911     struct cpuhp_step *sp = cpuhp_get_step(state);
1912     int ret;
1913 
1914     /*
1915      * If there's nothing to do, we done.
1916      * Relies on the union for multi_instance.
1917      */
1918     if ((bringup && !sp->startup.single) || (!bringup && !sp->teardown.single)) {
1919         return 0;
1920     }
1921     /*
1922      * The non AP bound callbacks can fail on bringup. On teardown
1923      * e.g. module removal we crash for now.
1924      */
1925 #ifdef CONFIG_SMP
1926     if (cpuhp_is_ap_state(state)) {
1927         ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1928     } else {
1929         ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1930     }
1931 #else
1932     ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1933 #endif
1934     BUG_ON(ret && !bringup);
1935     return ret;
1936 }
1937 
1938 /*
1939  * Called from __cpuhp_setup_state on a recoverable failure.
1940  *
1941  * Note: The teardown callbacks for rollback are not allowed to fail!
1942  */
cpuhp_rollback_install(int failedcpu,enum cpuhp_state state,struct hlist_node * node)1943 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state, struct hlist_node *node)
1944 {
1945     int cpu;
1946 
1947     /* Roll back the already executed steps on the other cpus */
1948     for_each_present_cpu(cpu)
1949     {
1950         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1951         int cpustate = st->state;
1952 
1953         if (cpu >= failedcpu) {
1954             break;
1955         }
1956 
1957         /* Did we invoke the startup call on that cpu ? */
1958         if (cpustate >= state) {
1959             cpuhp_issue_call(cpu, state, false, node);
1960         }
1961     }
1962 }
1963 
__cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,struct hlist_node * node,bool invoke)1964 int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state, struct hlist_node *node, bool invoke)
1965 {
1966     struct cpuhp_step *sp;
1967     int cpu;
1968     int ret;
1969 
1970     lockdep_assert_cpus_held();
1971 
1972     sp = cpuhp_get_step(state);
1973     if (sp->multi_instance == false) {
1974         return -EINVAL;
1975     }
1976 
1977     mutex_lock(&cpuhp_state_mutex);
1978 
1979     if (!invoke || !sp->startup.multi) {
1980         goto add_node;
1981     }
1982 
1983     /*
1984      * Try to call the startup callback for each present cpu
1985      * depending on the hotplug state of the cpu.
1986      */
1987     for_each_present_cpu(cpu)
1988     {
1989         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1990         int cpustate = st->state;
1991 
1992         if (cpustate < state) {
1993             continue;
1994         }
1995 
1996         ret = cpuhp_issue_call(cpu, state, true, node);
1997         if (ret) {
1998             if (sp->teardown.multi) {
1999                 cpuhp_rollback_install(cpu, state, node);
2000             }
2001             goto unlock;
2002         }
2003     }
2004 add_node:
2005     ret = 0;
2006     hlist_add_head(node, &sp->list);
2007 unlock:
2008     mutex_unlock(&cpuhp_state_mutex);
2009     return ret;
2010 }
2011 
__cpuhp_state_add_instance(enum cpuhp_state state,struct hlist_node * node,bool invoke)2012 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke)
2013 {
2014     int ret;
2015 
2016     cpus_read_lock();
2017     ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
2018     cpus_read_unlock();
2019     return ret;
2020 }
2021 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
2022 
2023 /**
2024  * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
2025  * @state:        The state to setup
2026  * @invoke:        If true, the startup function is invoked for cpus where
2027  *            cpu state >= @state
2028  * @startup:        startup callback function
2029  * @teardown:        teardown callback function
2030  * @multi_instance:    State is set up for multiple instances which get
2031  *            added afterwards.
2032  *
2033  * The caller needs to hold cpus read locked while calling this function.
2034  * Returns:
2035  *   On success:
2036  *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
2037  *      0 for all other states
2038  *   On failure: proper (negative) error code
2039  */
__cpuhp_setup_state_cpuslocked(enum cpuhp_state state,const char * name,bool invoke,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)2040 int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name, bool invoke,
2041                                    int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu),
2042                                    bool multi_instance)
2043 {
2044     int cpu, ret = 0;
2045     bool dynstate;
2046 
2047     lockdep_assert_cpus_held();
2048 
2049     if (cpuhp_cb_check(state) || !name) {
2050         return -EINVAL;
2051     }
2052 
2053     mutex_lock(&cpuhp_state_mutex);
2054 
2055     ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
2056 
2057     dynstate = state == CPUHP_AP_ONLINE_DYN;
2058     if (ret > 0 && dynstate) {
2059         state = ret;
2060         ret = 0;
2061     }
2062 
2063     if (ret || !invoke || !startup) {
2064         goto out;
2065     }
2066 
2067     /*
2068      * Try to call the startup callback for each present cpu
2069      * depending on the hotplug state of the cpu.
2070      */
2071     for_each_present_cpu(cpu)
2072     {
2073         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2074         int cpustate = st->state;
2075 
2076         if (cpustate < state) {
2077             continue;
2078         }
2079 
2080         ret = cpuhp_issue_call(cpu, state, true, NULL);
2081         if (ret) {
2082             if (teardown) {
2083                 cpuhp_rollback_install(cpu, state, NULL);
2084             }
2085             cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2086             goto out;
2087         }
2088     }
2089 out:
2090     mutex_unlock(&cpuhp_state_mutex);
2091     /*
2092      * If the requested state is CPUHP_AP_ONLINE_DYN, return the
2093      * dynamically allocated state in case of success.
2094      */
2095     if (!ret && dynstate) {
2096         return state;
2097     }
2098     return ret;
2099 }
2100 EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
2101 
__cpuhp_setup_state(enum cpuhp_state state,const char * name,bool invoke,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)2102 int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, int (*startup)(unsigned int cpu),
2103                         int (*teardown)(unsigned int cpu), bool multi_instance)
2104 {
2105     int ret;
2106 
2107     cpus_read_lock();
2108     ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup, teardown, multi_instance);
2109     cpus_read_unlock();
2110     return ret;
2111 }
2112 EXPORT_SYMBOL(__cpuhp_setup_state);
2113 
__cpuhp_state_remove_instance(enum cpuhp_state state,struct hlist_node * node,bool invoke)2114 int __cpuhp_state_remove_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke)
2115 {
2116     struct cpuhp_step *sp = cpuhp_get_step(state);
2117     int cpu;
2118 
2119     BUG_ON(cpuhp_cb_check(state));
2120 
2121     if (!sp->multi_instance) {
2122         return -EINVAL;
2123     }
2124 
2125     cpus_read_lock();
2126     mutex_lock(&cpuhp_state_mutex);
2127 
2128     if (!invoke || !cpuhp_get_teardown_cb(state)) {
2129         goto remove;
2130     }
2131     /*
2132      * Call the teardown callback for each present cpu depending
2133      * on the hotplug state of the cpu. This function is not
2134      * allowed to fail currently!
2135      */
2136     for_each_present_cpu(cpu)
2137     {
2138         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2139         int cpustate = st->state;
2140 
2141         if (cpustate >= state) {
2142             cpuhp_issue_call(cpu, state, false, node);
2143         }
2144     }
2145 
2146 remove:
2147     hlist_del(node);
2148     mutex_unlock(&cpuhp_state_mutex);
2149     cpus_read_unlock();
2150 
2151     return 0;
2152 }
2153 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
2154 
2155 /**
2156  * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
2157  * @state:    The state to remove
2158  * @invoke:    If true, the teardown function is invoked for cpus where
2159  *        cpu state >= @state
2160  *
2161  * The caller needs to hold cpus read locked while calling this function.
2162  * The teardown callback is currently not allowed to fail. Think
2163  * about module removal!
2164  */
__cpuhp_remove_state_cpuslocked(enum cpuhp_state state,bool invoke)2165 void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
2166 {
2167     struct cpuhp_step *sp = cpuhp_get_step(state);
2168     int cpu;
2169 
2170     BUG_ON(cpuhp_cb_check(state));
2171 
2172     lockdep_assert_cpus_held();
2173 
2174     mutex_lock(&cpuhp_state_mutex);
2175     if (sp->multi_instance) {
2176         WARN(!hlist_empty(&sp->list), "Error: Removing state %d which has instances left.\n", state);
2177         goto remove;
2178     }
2179 
2180     if (!invoke || !cpuhp_get_teardown_cb(state)) {
2181         goto remove;
2182     }
2183 
2184     /*
2185      * Call the teardown callback for each present cpu depending
2186      * on the hotplug state of the cpu. This function is not
2187      * allowed to fail currently!
2188      */
2189     for_each_present_cpu(cpu)
2190     {
2191         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2192         int cpustate = st->state;
2193 
2194         if (cpustate >= state) {
2195             cpuhp_issue_call(cpu, state, false, NULL);
2196         }
2197     }
2198 remove:
2199     cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2200     mutex_unlock(&cpuhp_state_mutex);
2201 }
2202 EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2203 
__cpuhp_remove_state(enum cpuhp_state state,bool invoke)2204 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2205 {
2206     cpus_read_lock();
2207     __cpuhp_remove_state_cpuslocked(state, invoke);
2208     cpus_read_unlock();
2209 }
2210 EXPORT_SYMBOL(__cpuhp_remove_state);
2211 
2212 #ifdef CONFIG_HOTPLUG_SMT
cpuhp_offline_cpu_device(unsigned int cpu)2213 static void cpuhp_offline_cpu_device(unsigned int cpu)
2214 {
2215     struct device *dev = get_cpu_device(cpu);
2216 
2217     dev->offline = true;
2218     /* Tell user space about the state change */
2219     kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2220 }
2221 
cpuhp_online_cpu_device(unsigned int cpu)2222 static void cpuhp_online_cpu_device(unsigned int cpu)
2223 {
2224     struct device *dev = get_cpu_device(cpu);
2225 
2226     dev->offline = false;
2227     /* Tell user space about the state change */
2228     kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2229 }
2230 
cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)2231 int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2232 {
2233     int cpu, ret = 0;
2234 
2235     cpu_maps_update_begin();
2236     for_each_online_cpu(cpu)
2237     {
2238         if (topology_is_primary_thread(cpu)) {
2239             continue;
2240         }
2241         ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2242         if (ret) {
2243             break;
2244         }
2245         /*
2246          * As this needs to hold the cpu maps lock it's impossible
2247          * to call device_offline() because that ends up calling
2248          * cpu_down() which takes cpu maps lock. cpu maps lock
2249          * needs to be held as this might race against in kernel
2250          * abusers of the hotplug machinery (thermal management).
2251          *
2252          * So nothing would update device:offline state. That would
2253          * leave the sysfs entry stale and prevent onlining after
2254          * smt control has been changed to 'off' again. This is
2255          * called under the sysfs hotplug lock, so it is properly
2256          * serialized against the regular offline usage.
2257          */
2258         cpuhp_offline_cpu_device(cpu);
2259     }
2260     if (!ret) {
2261         cpu_smt_control = ctrlval;
2262     }
2263     cpu_maps_update_done();
2264     return ret;
2265 }
2266 
cpuhp_smt_enable(void)2267 int cpuhp_smt_enable(void)
2268 {
2269     int cpu, ret = 0;
2270 
2271     cpu_maps_update_begin();
2272     cpu_smt_control = CPU_SMT_ENABLED;
2273     for_each_present_cpu(cpu)
2274     {
2275         /* Skip online CPUs and CPUs on offline nodes */
2276         if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) {
2277             continue;
2278         }
2279         ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2280         if (ret) {
2281             break;
2282         }
2283         /* See comment in cpuhp_smt_disable() */
2284         cpuhp_online_cpu_device(cpu);
2285     }
2286     cpu_maps_update_done();
2287     return ret;
2288 }
2289 #endif
2290 
2291 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
show_cpuhp_state(struct device * dev,struct device_attribute * attr,char * buf)2292 static ssize_t show_cpuhp_state(struct device *dev, struct device_attribute *attr, char *buf)
2293 {
2294     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2295 
2296     return sprintf(buf, "%d\n", st->state);
2297 }
2298 static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
2299 
write_cpuhp_target(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2300 static ssize_t write_cpuhp_target(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2301 {
2302     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2303     struct cpuhp_step *sp;
2304     int target, ret;
2305 
2306     ret = kstrtoint(buf, 10, &target);
2307     if (ret) {
2308         return ret;
2309     }
2310 
2311 #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2312     if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE) {
2313         return -EINVAL;
2314     }
2315 #else
2316     if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE) {
2317         return -EINVAL;
2318     }
2319 #endif
2320 
2321     ret = lock_device_hotplug_sysfs();
2322     if (ret) {
2323         return ret;
2324     }
2325 
2326     mutex_lock(&cpuhp_state_mutex);
2327     sp = cpuhp_get_step(target);
2328     ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2329     mutex_unlock(&cpuhp_state_mutex);
2330     if (ret) {
2331         goto out;
2332     }
2333 
2334     if (st->state < target) {
2335         ret = cpu_up(dev->id, target);
2336     } else {
2337         ret = cpu_down(dev->id, target);
2338     }
2339 out:
2340     unlock_device_hotplug();
2341     return ret ? ret : count;
2342 }
2343 
show_cpuhp_target(struct device * dev,struct device_attribute * attr,char * buf)2344 static ssize_t show_cpuhp_target(struct device *dev, struct device_attribute *attr, char *buf)
2345 {
2346     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2347 
2348     return sprintf(buf, "%d\n", st->target);
2349 }
2350 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
2351 
write_cpuhp_fail(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2352 static ssize_t write_cpuhp_fail(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2353 {
2354     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2355     struct cpuhp_step *sp;
2356     int fail, ret;
2357 
2358     ret = kstrtoint(buf, 10, &fail);
2359     if (ret) {
2360         return ret;
2361     }
2362 
2363     if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) {
2364         return -EINVAL;
2365     }
2366 
2367     /*
2368      * Cannot fail STARTING/DYING callbacks.
2369      */
2370     if (cpuhp_is_atomic_state(fail)) {
2371         return -EINVAL;
2372     }
2373 
2374     /*
2375      * Cannot fail anything that doesn't have callbacks.
2376      */
2377     mutex_lock(&cpuhp_state_mutex);
2378     sp = cpuhp_get_step(fail);
2379     if (!sp->startup.single && !sp->teardown.single) {
2380         ret = -EINVAL;
2381     }
2382     mutex_unlock(&cpuhp_state_mutex);
2383     if (ret) {
2384         return ret;
2385     }
2386 
2387     st->fail = fail;
2388 
2389     return count;
2390 }
2391 
show_cpuhp_fail(struct device * dev,struct device_attribute * attr,char * buf)2392 static ssize_t show_cpuhp_fail(struct device *dev, struct device_attribute *attr, char *buf)
2393 {
2394     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2395 
2396     return sprintf(buf, "%d\n", st->fail);
2397 }
2398 
2399 static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
2400 
2401 static struct attribute *cpuhp_cpu_attrs[] = {&dev_attr_state.attr, &dev_attr_target.attr, &dev_attr_fail.attr, NULL};
2402 
2403 static const struct attribute_group cpuhp_cpu_attr_group = {.attrs = cpuhp_cpu_attrs, .name = "hotplug", NULL};
2404 
show_cpuhp_states(struct device * dev,struct device_attribute * attr,char * buf)2405 static ssize_t show_cpuhp_states(struct device *dev, struct device_attribute *attr, char *buf)
2406 {
2407     ssize_t cur, res = 0;
2408     int i;
2409 
2410     mutex_lock(&cpuhp_state_mutex);
2411     for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2412         struct cpuhp_step *sp = cpuhp_get_step(i);
2413 
2414         if (sp->name) {
2415             cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2416             buf += cur;
2417             res += cur;
2418         }
2419     }
2420     mutex_unlock(&cpuhp_state_mutex);
2421     return res;
2422 }
2423 static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
2424 
2425 static struct attribute *cpuhp_cpu_root_attrs[] = {&dev_attr_states.attr, NULL};
2426 
2427 static const struct attribute_group cpuhp_cpu_root_attr_group = {
2428     .attrs = cpuhp_cpu_root_attrs, .name = "hotplug", NULL};
2429 
2430 #ifdef CONFIG_HOTPLUG_SMT
2431 
_store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2432 static ssize_t _store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2433 {
2434     int ctrlval, ret;
2435 
2436     if (sysfs_streq(buf, "on")) {
2437         ctrlval = CPU_SMT_ENABLED;
2438     } else if (sysfs_streq(buf, "off")) {
2439         ctrlval = CPU_SMT_DISABLED;
2440     } else if (sysfs_streq(buf, "forceoff")) {
2441         ctrlval = CPU_SMT_FORCE_DISABLED;
2442     } else {
2443         return -EINVAL;
2444     }
2445 
2446     if (cpu_smt_control == CPU_SMT_FORCE_DISABLED) {
2447         return -EPERM;
2448     }
2449 
2450     if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) {
2451         return -ENODEV;
2452     }
2453 
2454     ret = lock_device_hotplug_sysfs();
2455     if (ret) {
2456         return ret;
2457     }
2458 
2459     if (ctrlval != cpu_smt_control) {
2460         switch (ctrlval) {
2461             case CPU_SMT_ENABLED:
2462                 ret = cpuhp_smt_enable();
2463                 break;
2464             case CPU_SMT_DISABLED:
2465             case CPU_SMT_FORCE_DISABLED:
2466                 ret = cpuhp_smt_disable(ctrlval);
2467                 break;
2468         }
2469     }
2470 
2471     unlock_device_hotplug();
2472     return ret ? ret : count;
2473 }
2474 
2475 #else  /* !CONFIG_HOTPLUG_SMT */
_store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2476 static ssize_t _store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2477 {
2478     return -ENODEV;
2479 }
2480 #endif /* CONFIG_HOTPLUG_SMT */
2481 
2482 static const char *smt_states[] = {
2483     [CPU_SMT_ENABLED] = "on",
2484     [CPU_SMT_DISABLED] = "off",
2485     [CPU_SMT_FORCE_DISABLED] = "forceoff",
2486     [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2487     [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
2488 };
2489 
show_smt_control(struct device * dev,struct device_attribute * attr,char * buf)2490 static ssize_t show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
2491 {
2492     const char *state = smt_states[cpu_smt_control];
2493 
2494     return snprintf(buf, PAGE_SIZE - CPU_PAGE_SIZE_OFF_TWO, "%s\n", state);
2495 }
2496 
store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2497 static ssize_t store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2498 {
2499     return _store_smt_control(dev, attr, buf, count);
2500 }
2501 static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2502 
show_smt_active(struct device * dev,struct device_attribute * attr,char * buf)2503 static ssize_t show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
2504 {
2505     return snprintf(buf, PAGE_SIZE - CPU_PAGE_SIZE_OFF_TWO, "%d\n", sched_smt_active());
2506 }
2507 static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2508 
2509 static struct attribute *cpuhp_smt_attrs[] = {&dev_attr_control.attr, &dev_attr_active.attr, NULL};
2510 
2511 static const struct attribute_group cpuhp_smt_attr_group = {.attrs = cpuhp_smt_attrs, .name = "smt", NULL};
2512 
cpu_smt_sysfs_init(void)2513 static int __init cpu_smt_sysfs_init(void)
2514 {
2515     return sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpuhp_smt_attr_group);
2516 }
2517 
cpuhp_sysfs_init(void)2518 static int __init cpuhp_sysfs_init(void)
2519 {
2520     int cpu, ret;
2521 
2522     ret = cpu_smt_sysfs_init();
2523     if (ret) {
2524         return ret;
2525     }
2526 
2527     ret = sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpuhp_cpu_root_attr_group);
2528     if (ret) {
2529         return ret;
2530     }
2531 
2532     for_each_possible_cpu(cpu)
2533     {
2534         struct device *dev = get_cpu_device(cpu);
2535 
2536         if (!dev) {
2537             continue;
2538         }
2539         ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2540         if (ret) {
2541             return ret;
2542         }
2543     }
2544     return 0;
2545 }
2546 device_initcall(cpuhp_sysfs_init);
2547 #endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2548 
2549 /*
2550  * cpu_bit_bitmap[] is a special, "compressed" data structure that
2551  * represents all NR_CPUS bits binary values of 1<<nr.
2552  *
2553  * It is used by cpumask_of() to get a constant address to a CPU
2554  * mask value that has a single bit set only.
2555  */
2556 
2557 /* cpu_bit_bitmap[0] is empty - so we can back into it */
2558 #define MASK_DECLARE_1(x) [(x) + 1][0] = (1UL << (x))
2559 #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1((x) + 1)
2560 #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2((x) + 2)
2561 #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4((x) + 4)
2562 
2563 const unsigned long cpu_bit_bitmap[BITS_PER_LONG + 1][BITS_TO_LONGS(NR_CPUS)] = {
2564 
2565     MASK_DECLARE_8(0),  MASK_DECLARE_8(8),  MASK_DECLARE_8(16), MASK_DECLARE_8(24),
2566 #if BITS_PER_LONG > 32
2567     MASK_DECLARE_8(32), MASK_DECLARE_8(40), MASK_DECLARE_8(48), MASK_DECLARE_8(56),
2568 #endif
2569 };
2570 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2571 
2572 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2573 EXPORT_SYMBOL(cpu_all_bits);
2574 
2575 #ifdef CONFIG_INIT_ALL_POSSIBLE
2576 struct cpumask __cpu_possible_mask __read_mostly = {CPU_BITS_ALL};
2577 #else
2578 struct cpumask __cpu_possible_mask __read_mostly;
2579 #endif
2580 EXPORT_SYMBOL(__cpu_possible_mask);
2581 
2582 struct cpumask __cpu_online_mask __read_mostly;
2583 EXPORT_SYMBOL(__cpu_online_mask);
2584 
2585 struct cpumask __cpu_present_mask __read_mostly;
2586 EXPORT_SYMBOL(__cpu_present_mask);
2587 
2588 struct cpumask __cpu_active_mask __read_mostly;
2589 EXPORT_SYMBOL(__cpu_active_mask);
2590 
2591 #ifdef CONFIG_CPU_ISOLATION_OPT
2592 struct cpumask __cpu_isolated_mask __read_mostly;
2593 EXPORT_SYMBOL(__cpu_isolated_mask);
2594 #endif
2595 
2596 atomic_t __num_online_cpus __read_mostly;
2597 EXPORT_SYMBOL(__num_online_cpus);
2598 
init_cpu_present(const struct cpumask * src)2599 void init_cpu_present(const struct cpumask *src)
2600 {
2601     cpumask_copy(&__cpu_present_mask, src);
2602 }
2603 
init_cpu_possible(const struct cpumask * src)2604 void init_cpu_possible(const struct cpumask *src)
2605 {
2606     cpumask_copy(&__cpu_possible_mask, src);
2607 }
2608 
init_cpu_online(const struct cpumask * src)2609 void init_cpu_online(const struct cpumask *src)
2610 {
2611     cpumask_copy(&__cpu_online_mask, src);
2612 }
2613 
2614 #ifdef CONFIG_CPU_ISOLATION_OPT
init_cpu_isolated(const struct cpumask * src)2615 void init_cpu_isolated(const struct cpumask *src)
2616 {
2617     cpumask_copy(&__cpu_isolated_mask, src);
2618 }
2619 #endif
2620 
set_cpu_online(unsigned int cpu,bool online)2621 void set_cpu_online(unsigned int cpu, bool online)
2622 {
2623     /*
2624      * atomic_inc/dec() is required to handle the horrid abuse of this
2625      * function by the reboot and kexec code which invoke it from
2626      * IPI/NMI broadcasts when shutting down CPUs. Invocation from
2627      * regular CPU hotplug is properly serialized.
2628      *
2629      * Note, that the fact that __num_online_cpus is of type atomic_t
2630      * does not protect readers which are not serialized against
2631      * concurrent hotplug operations.
2632      */
2633     if (online) {
2634         if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask)) {
2635             atomic_inc(&__num_online_cpus);
2636         }
2637     } else {
2638         if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask)) {
2639             atomic_dec(&__num_online_cpus);
2640         }
2641     }
2642 }
2643 
2644 /*
2645  * Activate the first processor.
2646  */
boot_cpu_init(void)2647 void __init boot_cpu_init(void)
2648 {
2649     int cpu = smp_processor_id();
2650 
2651     /* Mark the boot cpu "present", "online" etc for SMP and UP case */
2652     set_cpu_online(cpu, true);
2653     set_cpu_active(cpu, true);
2654     set_cpu_present(cpu, true);
2655     set_cpu_possible(cpu, true);
2656 
2657 #ifdef CONFIG_SMP
2658     __boot_cpu_id = cpu;
2659 #endif
2660 }
2661 
2662 /*
2663  * Must be called _AFTER_ setting up the per_cpu areas
2664  */
boot_cpu_hotplug_init(void)2665 void __init boot_cpu_hotplug_init(void)
2666 {
2667 #ifdef CONFIG_SMP
2668     cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2669 #endif
2670     this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2671 }
2672 
2673 /*
2674  * These are used for a global "mitigations=" cmdline option for toggling
2675  * optional CPU mitigations.
2676  */
2677 enum cpu_mitigations {
2678     CPU_MITIGATIONS_OFF,
2679     CPU_MITIGATIONS_AUTO,
2680     CPU_MITIGATIONS_AUTO_NOSMT,
2681 };
2682 
2683 static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
2684 
mitigations_parse_cmdline(char * arg)2685 static int __init mitigations_parse_cmdline(char *arg)
2686 {
2687     if (!strcmp(arg, "off")) {
2688         cpu_mitigations = CPU_MITIGATIONS_OFF;
2689     } else if (!strcmp(arg, "auto")) {
2690         cpu_mitigations = CPU_MITIGATIONS_AUTO;
2691     } else if (!strcmp(arg, "auto,nosmt")) {
2692         cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2693     } else {
2694         pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", arg);
2695     }
2696 
2697     return 0;
2698 }
2699 early_param("mitigations", mitigations_parse_cmdline);
2700 
2701 /* mitigations=off */
cpu_mitigations_off(void)2702 bool cpu_mitigations_off(void)
2703 {
2704     return cpu_mitigations == CPU_MITIGATIONS_OFF;
2705 }
2706 EXPORT_SYMBOL_GPL(cpu_mitigations_off);
2707 
2708 /* mitigations=auto,nosmt */
cpu_mitigations_auto_nosmt(void)2709 bool cpu_mitigations_auto_nosmt(void)
2710 {
2711     return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
2712 }
2713 EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
2714