• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* CPU control.
2  * (C) 2001, 2002, 2003, 2004 Rusty Russell
3  *
4  * This code is licenced under the GPL.
5  */
6 #include <linux/sched/mm.h>
7 #include <linux/proc_fs.h>
8 #include <linux/smp.h>
9 #include <linux/init.h>
10 #include <linux/notifier.h>
11 #include <linux/sched/signal.h>
12 #include <linux/sched/hotplug.h>
13 #include <linux/sched/isolation.h>
14 #include <linux/sched/task.h>
15 #include <linux/sched/smt.h>
16 #include <linux/unistd.h>
17 #include <linux/cpu.h>
18 #include <linux/oom.h>
19 #include <linux/rcupdate.h>
20 #include <linux/export.h>
21 #include <linux/bug.h>
22 #include <linux/kthread.h>
23 #include <linux/stop_machine.h>
24 #include <linux/mutex.h>
25 #include <linux/gfp.h>
26 #include <linux/suspend.h>
27 #include <linux/lockdep.h>
28 #include <linux/tick.h>
29 #include <linux/irq.h>
30 #include <linux/nmi.h>
31 #include <linux/smpboot.h>
32 #include <linux/relay.h>
33 #include <linux/slab.h>
34 #include <linux/percpu-rwsem.h>
35 #include <linux/cpuset.h>
36 
37 #include <trace/events/power.h>
38 #define CREATE_TRACE_POINTS
39 #include <trace/events/cpuhp.h>
40 
41 #include "smpboot.h"
42 
43 /**
44  * cpuhp_cpu_state - Per cpu hotplug state storage
45  * @state:	The current cpu state
46  * @target:	The target state
47  * @thread:	Pointer to the hotplug thread
48  * @should_run:	Thread should execute
49  * @rollback:	Perform a rollback
50  * @single:	Single callback invocation
51  * @bringup:	Single callback bringup or teardown selector
52  * @cb_state:	The state for a single callback (install/uninstall)
53  * @result:	Result of the operation
54  * @done_up:	Signal completion to the issuer of the task for cpu-up
55  * @done_down:	Signal completion to the issuer of the task for cpu-down
56  */
57 struct cpuhp_cpu_state {
58 	enum cpuhp_state	state;
59 	enum cpuhp_state	target;
60 	enum cpuhp_state	fail;
61 #ifdef CONFIG_SMP
62 	struct task_struct	*thread;
63 	bool			should_run;
64 	bool			rollback;
65 	bool			single;
66 	bool			bringup;
67 	struct hlist_node	*node;
68 	struct hlist_node	*last;
69 	enum cpuhp_state	cb_state;
70 	int			result;
71 	struct completion	done_up;
72 	struct completion	done_down;
73 #endif
74 };
75 
76 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
77 	.fail = CPUHP_INVALID,
78 };
79 
80 #ifdef CONFIG_SMP
81 cpumask_t cpus_booted_once_mask;
82 #endif
83 
84 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
85 static struct lockdep_map cpuhp_state_up_map =
86 	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
87 static struct lockdep_map cpuhp_state_down_map =
88 	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
89 
90 
cpuhp_lock_acquire(bool bringup)91 static inline void cpuhp_lock_acquire(bool bringup)
92 {
93 	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
94 }
95 
cpuhp_lock_release(bool bringup)96 static inline void cpuhp_lock_release(bool bringup)
97 {
98 	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
99 }
100 #else
101 
cpuhp_lock_acquire(bool bringup)102 static inline void cpuhp_lock_acquire(bool bringup) { }
cpuhp_lock_release(bool bringup)103 static inline void cpuhp_lock_release(bool bringup) { }
104 
105 #endif
106 
107 /**
108  * cpuhp_step - Hotplug state machine step
109  * @name:	Name of the step
110  * @startup:	Startup function of the step
111  * @teardown:	Teardown function of the step
112  * @cant_stop:	Bringup/teardown can't be stopped at this step
113  */
114 struct cpuhp_step {
115 	const char		*name;
116 	union {
117 		int		(*single)(unsigned int cpu);
118 		int		(*multi)(unsigned int cpu,
119 					 struct hlist_node *node);
120 	} startup;
121 	union {
122 		int		(*single)(unsigned int cpu);
123 		int		(*multi)(unsigned int cpu,
124 					 struct hlist_node *node);
125 	} teardown;
126 	struct hlist_head	list;
127 	bool			cant_stop;
128 	bool			multi_instance;
129 };
130 
131 static DEFINE_MUTEX(cpuhp_state_mutex);
132 static struct cpuhp_step cpuhp_hp_states[];
133 
cpuhp_get_step(enum cpuhp_state state)134 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
135 {
136 	return cpuhp_hp_states + state;
137 }
138 
139 /**
140  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
141  * @cpu:	The cpu for which the callback should be invoked
142  * @state:	The state to do callbacks for
143  * @bringup:	True if the bringup callback should be invoked
144  * @node:	For multi-instance, do a single entry callback for install/remove
145  * @lastp:	For multi-instance rollback, remember how far we got
146  *
147  * Called from cpu hotplug and from the state register machinery.
148  */
cpuhp_invoke_callback(unsigned int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node,struct hlist_node ** lastp)149 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
150 				 bool bringup, struct hlist_node *node,
151 				 struct hlist_node **lastp)
152 {
153 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
154 	struct cpuhp_step *step = cpuhp_get_step(state);
155 	int (*cbm)(unsigned int cpu, struct hlist_node *node);
156 	int (*cb)(unsigned int cpu);
157 	int ret, cnt;
158 
159 	if (st->fail == state) {
160 		st->fail = CPUHP_INVALID;
161 
162 		if (!(bringup ? step->startup.single : step->teardown.single))
163 			return 0;
164 
165 		return -EAGAIN;
166 	}
167 
168 	if (!step->multi_instance) {
169 		WARN_ON_ONCE(lastp && *lastp);
170 		cb = bringup ? step->startup.single : step->teardown.single;
171 		if (!cb)
172 			return 0;
173 		trace_cpuhp_enter(cpu, st->target, state, cb);
174 		ret = cb(cpu);
175 		trace_cpuhp_exit(cpu, st->state, state, ret);
176 		return ret;
177 	}
178 	cbm = bringup ? step->startup.multi : step->teardown.multi;
179 	if (!cbm)
180 		return 0;
181 
182 	/* Single invocation for instance add/remove */
183 	if (node) {
184 		WARN_ON_ONCE(lastp && *lastp);
185 		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
186 		ret = cbm(cpu, node);
187 		trace_cpuhp_exit(cpu, st->state, state, ret);
188 		return ret;
189 	}
190 
191 	/* State transition. Invoke on all instances */
192 	cnt = 0;
193 	hlist_for_each(node, &step->list) {
194 		if (lastp && node == *lastp)
195 			break;
196 
197 		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
198 		ret = cbm(cpu, node);
199 		trace_cpuhp_exit(cpu, st->state, state, ret);
200 		if (ret) {
201 			if (!lastp)
202 				goto err;
203 
204 			*lastp = node;
205 			return ret;
206 		}
207 		cnt++;
208 	}
209 	if (lastp)
210 		*lastp = NULL;
211 	return 0;
212 err:
213 	/* Rollback the instances if one failed */
214 	cbm = !bringup ? step->startup.multi : step->teardown.multi;
215 	if (!cbm)
216 		return ret;
217 
218 	hlist_for_each(node, &step->list) {
219 		if (!cnt--)
220 			break;
221 
222 		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
223 		ret = cbm(cpu, node);
224 		trace_cpuhp_exit(cpu, st->state, state, ret);
225 		/*
226 		 * Rollback must not fail,
227 		 */
228 		WARN_ON_ONCE(ret);
229 	}
230 	return ret;
231 }
232 
233 #ifdef CONFIG_SMP
cpuhp_is_ap_state(enum cpuhp_state state)234 static bool cpuhp_is_ap_state(enum cpuhp_state state)
235 {
236 	/*
237 	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
238 	 * purposes as that state is handled explicitly in cpu_down.
239 	 */
240 	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
241 }
242 
wait_for_ap_thread(struct cpuhp_cpu_state * st,bool bringup)243 static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
244 {
245 	struct completion *done = bringup ? &st->done_up : &st->done_down;
246 	wait_for_completion(done);
247 }
248 
complete_ap_thread(struct cpuhp_cpu_state * st,bool bringup)249 static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
250 {
251 	struct completion *done = bringup ? &st->done_up : &st->done_down;
252 	complete(done);
253 }
254 
255 /*
256  * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
257  */
cpuhp_is_atomic_state(enum cpuhp_state state)258 static bool cpuhp_is_atomic_state(enum cpuhp_state state)
259 {
260 	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
261 }
262 
263 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
264 static DEFINE_MUTEX(cpu_add_remove_lock);
265 bool cpuhp_tasks_frozen;
266 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
267 
268 /*
269  * The following two APIs (cpu_maps_update_begin/done) must be used when
270  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
271  */
cpu_maps_update_begin(void)272 void cpu_maps_update_begin(void)
273 {
274 	mutex_lock(&cpu_add_remove_lock);
275 }
276 
cpu_maps_update_done(void)277 void cpu_maps_update_done(void)
278 {
279 	mutex_unlock(&cpu_add_remove_lock);
280 }
281 
282 /*
283  * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
284  * Should always be manipulated under cpu_add_remove_lock
285  */
286 static int cpu_hotplug_disabled;
287 
288 #ifdef CONFIG_HOTPLUG_CPU
289 
290 DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
291 
cpus_read_lock(void)292 void cpus_read_lock(void)
293 {
294 	percpu_down_read(&cpu_hotplug_lock);
295 }
296 EXPORT_SYMBOL_GPL(cpus_read_lock);
297 
cpus_read_trylock(void)298 int cpus_read_trylock(void)
299 {
300 	return percpu_down_read_trylock(&cpu_hotplug_lock);
301 }
302 EXPORT_SYMBOL_GPL(cpus_read_trylock);
303 
cpus_read_unlock(void)304 void cpus_read_unlock(void)
305 {
306 	percpu_up_read(&cpu_hotplug_lock);
307 }
308 EXPORT_SYMBOL_GPL(cpus_read_unlock);
309 
cpus_write_lock(void)310 void cpus_write_lock(void)
311 {
312 	percpu_down_write(&cpu_hotplug_lock);
313 }
314 
cpus_write_unlock(void)315 void cpus_write_unlock(void)
316 {
317 	percpu_up_write(&cpu_hotplug_lock);
318 }
319 
lockdep_assert_cpus_held(void)320 void lockdep_assert_cpus_held(void)
321 {
322 	/*
323 	 * We can't have hotplug operations before userspace starts running,
324 	 * and some init codepaths will knowingly not take the hotplug lock.
325 	 * This is all valid, so mute lockdep until it makes sense to report
326 	 * unheld locks.
327 	 */
328 	if (system_state < SYSTEM_RUNNING)
329 		return;
330 
331 	percpu_rwsem_assert_held(&cpu_hotplug_lock);
332 }
333 
lockdep_acquire_cpus_lock(void)334 static void lockdep_acquire_cpus_lock(void)
335 {
336 	rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
337 }
338 
lockdep_release_cpus_lock(void)339 static void lockdep_release_cpus_lock(void)
340 {
341 	rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
342 }
343 
344 /*
345  * Wait for currently running CPU hotplug operations to complete (if any) and
346  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
347  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
348  * hotplug path before performing hotplug operations. So acquiring that lock
349  * guarantees mutual exclusion from any currently running hotplug operations.
350  */
cpu_hotplug_disable(void)351 void cpu_hotplug_disable(void)
352 {
353 	cpu_maps_update_begin();
354 	cpu_hotplug_disabled++;
355 	cpu_maps_update_done();
356 }
357 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
358 
__cpu_hotplug_enable(void)359 static void __cpu_hotplug_enable(void)
360 {
361 	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
362 		return;
363 	cpu_hotplug_disabled--;
364 }
365 
cpu_hotplug_enable(void)366 void cpu_hotplug_enable(void)
367 {
368 	cpu_maps_update_begin();
369 	__cpu_hotplug_enable();
370 	cpu_maps_update_done();
371 }
372 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
373 
374 #else
375 
lockdep_acquire_cpus_lock(void)376 static void lockdep_acquire_cpus_lock(void)
377 {
378 }
379 
lockdep_release_cpus_lock(void)380 static void lockdep_release_cpus_lock(void)
381 {
382 }
383 
384 #endif	/* CONFIG_HOTPLUG_CPU */
385 
386 /*
387  * Architectures that need SMT-specific errata handling during SMT hotplug
388  * should override this.
389  */
arch_smt_update(void)390 void __weak arch_smt_update(void) { }
391 
392 #ifdef CONFIG_HOTPLUG_SMT
393 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
394 
cpu_smt_disable(bool force)395 void __init cpu_smt_disable(bool force)
396 {
397 	if (!cpu_smt_possible())
398 		return;
399 
400 	if (force) {
401 		pr_info("SMT: Force disabled\n");
402 		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
403 	} else {
404 		pr_info("SMT: disabled\n");
405 		cpu_smt_control = CPU_SMT_DISABLED;
406 	}
407 }
408 
409 /*
410  * The decision whether SMT is supported can only be done after the full
411  * CPU identification. Called from architecture code.
412  */
cpu_smt_check_topology(void)413 void __init cpu_smt_check_topology(void)
414 {
415 	if (!topology_smt_supported())
416 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
417 }
418 
smt_cmdline_disable(char * str)419 static int __init smt_cmdline_disable(char *str)
420 {
421 	cpu_smt_disable(str && !strcmp(str, "force"));
422 	return 0;
423 }
424 early_param("nosmt", smt_cmdline_disable);
425 
cpu_smt_allowed(unsigned int cpu)426 static inline bool cpu_smt_allowed(unsigned int cpu)
427 {
428 	if (cpu_smt_control == CPU_SMT_ENABLED)
429 		return true;
430 
431 	if (topology_is_primary_thread(cpu))
432 		return true;
433 
434 	/*
435 	 * On x86 it's required to boot all logical CPUs at least once so
436 	 * that the init code can get a chance to set CR4.MCE on each
437 	 * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
438 	 * core will shutdown the machine.
439 	 */
440 	return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
441 }
442 
443 /* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
cpu_smt_possible(void)444 bool cpu_smt_possible(void)
445 {
446 	return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
447 		cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
448 }
449 EXPORT_SYMBOL_GPL(cpu_smt_possible);
450 #else
cpu_smt_allowed(unsigned int cpu)451 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
452 #endif
453 
454 static inline enum cpuhp_state
cpuhp_set_state(struct cpuhp_cpu_state * st,enum cpuhp_state target)455 cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
456 {
457 	enum cpuhp_state prev_state = st->state;
458 
459 	st->rollback = false;
460 	st->last = NULL;
461 
462 	st->target = target;
463 	st->single = false;
464 	st->bringup = st->state < target;
465 
466 	return prev_state;
467 }
468 
469 static inline void
cpuhp_reset_state(struct cpuhp_cpu_state * st,enum cpuhp_state prev_state)470 cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
471 {
472 	st->rollback = true;
473 
474 	/*
475 	 * If we have st->last we need to undo partial multi_instance of this
476 	 * state first. Otherwise start undo at the previous state.
477 	 */
478 	if (!st->last) {
479 		if (st->bringup)
480 			st->state--;
481 		else
482 			st->state++;
483 	}
484 
485 	st->target = prev_state;
486 	st->bringup = !st->bringup;
487 }
488 
489 /* Regular hotplug invocation of the AP hotplug thread */
__cpuhp_kick_ap(struct cpuhp_cpu_state * st)490 static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
491 {
492 	if (!st->single && st->state == st->target)
493 		return;
494 
495 	st->result = 0;
496 	/*
497 	 * Make sure the above stores are visible before should_run becomes
498 	 * true. Paired with the mb() above in cpuhp_thread_fun()
499 	 */
500 	smp_mb();
501 	st->should_run = true;
502 	wake_up_process(st->thread);
503 	wait_for_ap_thread(st, st->bringup);
504 }
505 
cpuhp_kick_ap(struct cpuhp_cpu_state * st,enum cpuhp_state target)506 static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
507 {
508 	enum cpuhp_state prev_state;
509 	int ret;
510 
511 	prev_state = cpuhp_set_state(st, target);
512 	__cpuhp_kick_ap(st);
513 	if ((ret = st->result)) {
514 		cpuhp_reset_state(st, prev_state);
515 		__cpuhp_kick_ap(st);
516 	}
517 
518 	return ret;
519 }
520 
bringup_wait_for_ap(unsigned int cpu)521 static int bringup_wait_for_ap(unsigned int cpu)
522 {
523 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
524 
525 	/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
526 	wait_for_ap_thread(st, true);
527 	if (WARN_ON_ONCE((!cpu_online(cpu))))
528 		return -ECANCELED;
529 
530 	/* Unpark the hotplug thread of the target cpu */
531 	kthread_unpark(st->thread);
532 
533 	/*
534 	 * SMT soft disabling on X86 requires to bring the CPU out of the
535 	 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
536 	 * CPU marked itself as booted_once in notify_cpu_starting() so the
537 	 * cpu_smt_allowed() check will now return false if this is not the
538 	 * primary sibling.
539 	 */
540 	if (!cpu_smt_allowed(cpu))
541 		return -ECANCELED;
542 
543 	if (st->target <= CPUHP_AP_ONLINE_IDLE)
544 		return 0;
545 
546 	return cpuhp_kick_ap(st, st->target);
547 }
548 
bringup_cpu(unsigned int cpu)549 static int bringup_cpu(unsigned int cpu)
550 {
551 	struct task_struct *idle = idle_thread_get(cpu);
552 	int ret;
553 
554 	/*
555 	 * Some architectures have to walk the irq descriptors to
556 	 * setup the vector space for the cpu which comes online.
557 	 * Prevent irq alloc/free across the bringup.
558 	 */
559 	irq_lock_sparse();
560 
561 	/* Arch-specific enabling code. */
562 	ret = __cpu_up(cpu, idle);
563 	irq_unlock_sparse();
564 	if (ret)
565 		return ret;
566 	return bringup_wait_for_ap(cpu);
567 }
568 
finish_cpu(unsigned int cpu)569 static int finish_cpu(unsigned int cpu)
570 {
571 	struct task_struct *idle = idle_thread_get(cpu);
572 	struct mm_struct *mm = idle->active_mm;
573 
574 	/*
575 	 * idle_task_exit() will have switched to &init_mm, now
576 	 * clean up any remaining active_mm state.
577 	 */
578 	if (mm != &init_mm)
579 		idle->active_mm = &init_mm;
580 	mmdrop(mm);
581 	return 0;
582 }
583 
584 /*
585  * Hotplug state machine related functions
586  */
587 
undo_cpu_up(unsigned int cpu,struct cpuhp_cpu_state * st)588 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
589 {
590 	for (st->state--; st->state > st->target; st->state--)
591 		cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
592 }
593 
can_rollback_cpu(struct cpuhp_cpu_state * st)594 static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
595 {
596 	if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
597 		return true;
598 	/*
599 	 * When CPU hotplug is disabled, then taking the CPU down is not
600 	 * possible because takedown_cpu() and the architecture and
601 	 * subsystem specific mechanisms are not available. So the CPU
602 	 * which would be completely unplugged again needs to stay around
603 	 * in the current state.
604 	 */
605 	return st->state <= CPUHP_BRINGUP_CPU;
606 }
607 
cpuhp_up_callbacks(unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)608 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
609 			      enum cpuhp_state target)
610 {
611 	enum cpuhp_state prev_state = st->state;
612 	int ret = 0;
613 
614 	while (st->state < target) {
615 		st->state++;
616 		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
617 		if (ret) {
618 			if (can_rollback_cpu(st)) {
619 				st->target = prev_state;
620 				undo_cpu_up(cpu, st);
621 			}
622 			break;
623 		}
624 	}
625 	return ret;
626 }
627 
628 /*
629  * The cpu hotplug threads manage the bringup and teardown of the cpus
630  */
cpuhp_create(unsigned int cpu)631 static void cpuhp_create(unsigned int cpu)
632 {
633 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
634 
635 	init_completion(&st->done_up);
636 	init_completion(&st->done_down);
637 }
638 
cpuhp_should_run(unsigned int cpu)639 static int cpuhp_should_run(unsigned int cpu)
640 {
641 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
642 
643 	return st->should_run;
644 }
645 
646 /*
647  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
648  * callbacks when a state gets [un]installed at runtime.
649  *
650  * Each invocation of this function by the smpboot thread does a single AP
651  * state callback.
652  *
653  * It has 3 modes of operation:
654  *  - single: runs st->cb_state
655  *  - up:     runs ++st->state, while st->state < st->target
656  *  - down:   runs st->state--, while st->state > st->target
657  *
658  * When complete or on error, should_run is cleared and the completion is fired.
659  */
cpuhp_thread_fun(unsigned int cpu)660 static void cpuhp_thread_fun(unsigned int cpu)
661 {
662 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
663 	bool bringup = st->bringup;
664 	enum cpuhp_state state;
665 
666 	if (WARN_ON_ONCE(!st->should_run))
667 		return;
668 
669 	/*
670 	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
671 	 * that if we see ->should_run we also see the rest of the state.
672 	 */
673 	smp_mb();
674 
675 	/*
676 	 * The BP holds the hotplug lock, but we're now running on the AP,
677 	 * ensure that anybody asserting the lock is held, will actually find
678 	 * it so.
679 	 */
680 	lockdep_acquire_cpus_lock();
681 	cpuhp_lock_acquire(bringup);
682 
683 	if (st->single) {
684 		state = st->cb_state;
685 		st->should_run = false;
686 	} else {
687 		if (bringup) {
688 			st->state++;
689 			state = st->state;
690 			st->should_run = (st->state < st->target);
691 			WARN_ON_ONCE(st->state > st->target);
692 		} else {
693 			state = st->state;
694 			st->state--;
695 			st->should_run = (st->state > st->target);
696 			WARN_ON_ONCE(st->state < st->target);
697 		}
698 	}
699 
700 	WARN_ON_ONCE(!cpuhp_is_ap_state(state));
701 
702 	if (cpuhp_is_atomic_state(state)) {
703 		local_irq_disable();
704 		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
705 		local_irq_enable();
706 
707 		/*
708 		 * STARTING/DYING must not fail!
709 		 */
710 		WARN_ON_ONCE(st->result);
711 	} else {
712 		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
713 	}
714 
715 	if (st->result) {
716 		/*
717 		 * If we fail on a rollback, we're up a creek without no
718 		 * paddle, no way forward, no way back. We loose, thanks for
719 		 * playing.
720 		 */
721 		WARN_ON_ONCE(st->rollback);
722 		st->should_run = false;
723 	}
724 
725 	cpuhp_lock_release(bringup);
726 	lockdep_release_cpus_lock();
727 
728 	if (!st->should_run)
729 		complete_ap_thread(st, bringup);
730 }
731 
732 /* Invoke a single callback on a remote cpu */
733 static int
cpuhp_invoke_ap_callback(int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node)734 cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
735 			 struct hlist_node *node)
736 {
737 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
738 	int ret;
739 
740 	if (!cpu_online(cpu))
741 		return 0;
742 
743 	cpuhp_lock_acquire(false);
744 	cpuhp_lock_release(false);
745 
746 	cpuhp_lock_acquire(true);
747 	cpuhp_lock_release(true);
748 
749 	/*
750 	 * If we are up and running, use the hotplug thread. For early calls
751 	 * we invoke the thread function directly.
752 	 */
753 	if (!st->thread)
754 		return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
755 
756 	st->rollback = false;
757 	st->last = NULL;
758 
759 	st->node = node;
760 	st->bringup = bringup;
761 	st->cb_state = state;
762 	st->single = true;
763 
764 	__cpuhp_kick_ap(st);
765 
766 	/*
767 	 * If we failed and did a partial, do a rollback.
768 	 */
769 	if ((ret = st->result) && st->last) {
770 		st->rollback = true;
771 		st->bringup = !bringup;
772 
773 		__cpuhp_kick_ap(st);
774 	}
775 
776 	/*
777 	 * Clean up the leftovers so the next hotplug operation wont use stale
778 	 * data.
779 	 */
780 	st->node = st->last = NULL;
781 	return ret;
782 }
783 
cpuhp_kick_ap_work(unsigned int cpu)784 static int cpuhp_kick_ap_work(unsigned int cpu)
785 {
786 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
787 	enum cpuhp_state prev_state = st->state;
788 	int ret;
789 
790 	cpuhp_lock_acquire(false);
791 	cpuhp_lock_release(false);
792 
793 	cpuhp_lock_acquire(true);
794 	cpuhp_lock_release(true);
795 
796 	trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
797 	ret = cpuhp_kick_ap(st, st->target);
798 	trace_cpuhp_exit(cpu, st->state, prev_state, ret);
799 
800 	return ret;
801 }
802 
803 static struct smp_hotplug_thread cpuhp_threads = {
804 	.store			= &cpuhp_state.thread,
805 	.create			= &cpuhp_create,
806 	.thread_should_run	= cpuhp_should_run,
807 	.thread_fn		= cpuhp_thread_fun,
808 	.thread_comm		= "cpuhp/%u",
809 	.selfparking		= true,
810 };
811 
cpuhp_threads_init(void)812 void __init cpuhp_threads_init(void)
813 {
814 	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
815 	kthread_unpark(this_cpu_read(cpuhp_state.thread));
816 }
817 
818 /*
819  *
820  * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
821  * protected region.
822  *
823  * The operation is still serialized against concurrent CPU hotplug via
824  * cpu_add_remove_lock, i.e. CPU map protection.  But it is _not_
825  * serialized against other hotplug related activity like adding or
826  * removing of state callbacks and state instances, which invoke either the
827  * startup or the teardown callback of the affected state.
828  *
829  * This is required for subsystems which are unfixable vs. CPU hotplug and
830  * evade lock inversion problems by scheduling work which has to be
831  * completed _before_ cpu_up()/_cpu_down() returns.
832  *
833  * Don't even think about adding anything to this for any new code or even
834  * drivers. It's only purpose is to keep existing lock order trainwrecks
835  * working.
836  *
837  * For cpu_down() there might be valid reasons to finish cleanups which are
838  * not required to be done under cpu_hotplug_lock, but that's a different
839  * story and would be not invoked via this.
840  */
cpu_up_down_serialize_trainwrecks(bool tasks_frozen)841 static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
842 {
843 	/*
844 	 * cpusets delegate hotplug operations to a worker to "solve" the
845 	 * lock order problems. Wait for the worker, but only if tasks are
846 	 * _not_ frozen (suspend, hibernate) as that would wait forever.
847 	 *
848 	 * The wait is required because otherwise the hotplug operation
849 	 * returns with inconsistent state, which could even be observed in
850 	 * user space when a new CPU is brought up. The CPU plug uevent
851 	 * would be delivered and user space reacting on it would fail to
852 	 * move tasks to the newly plugged CPU up to the point where the
853 	 * work has finished because up to that point the newly plugged CPU
854 	 * is not assignable in cpusets/cgroups. On unplug that's not
855 	 * necessarily a visible issue, but it is still inconsistent state,
856 	 * which is the real problem which needs to be "fixed". This can't
857 	 * prevent the transient state between scheduling the work and
858 	 * returning from waiting for it.
859 	 */
860 	if (!tasks_frozen)
861 		cpuset_wait_for_hotplug();
862 }
863 
864 #ifdef CONFIG_HOTPLUG_CPU
865 #ifndef arch_clear_mm_cpumask_cpu
866 #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
867 #endif
868 
869 /**
870  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
871  * @cpu: a CPU id
872  *
873  * This function walks all processes, finds a valid mm struct for each one and
874  * then clears a corresponding bit in mm's cpumask.  While this all sounds
875  * trivial, there are various non-obvious corner cases, which this function
876  * tries to solve in a safe manner.
877  *
878  * Also note that the function uses a somewhat relaxed locking scheme, so it may
879  * be called only for an already offlined CPU.
880  */
clear_tasks_mm_cpumask(int cpu)881 void clear_tasks_mm_cpumask(int cpu)
882 {
883 	struct task_struct *p;
884 
885 	/*
886 	 * This function is called after the cpu is taken down and marked
887 	 * offline, so its not like new tasks will ever get this cpu set in
888 	 * their mm mask. -- Peter Zijlstra
889 	 * Thus, we may use rcu_read_lock() here, instead of grabbing
890 	 * full-fledged tasklist_lock.
891 	 */
892 	WARN_ON(cpu_online(cpu));
893 	rcu_read_lock();
894 	for_each_process(p) {
895 		struct task_struct *t;
896 
897 		/*
898 		 * Main thread might exit, but other threads may still have
899 		 * a valid mm. Find one.
900 		 */
901 		t = find_lock_task_mm(p);
902 		if (!t)
903 			continue;
904 		arch_clear_mm_cpumask_cpu(cpu, t->mm);
905 		task_unlock(t);
906 	}
907 	rcu_read_unlock();
908 }
909 
910 /* Take this CPU down. */
take_cpu_down(void * _param)911 static int take_cpu_down(void *_param)
912 {
913 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
914 	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
915 	int err, cpu = smp_processor_id();
916 	int ret;
917 
918 	/* Ensure this CPU doesn't handle any more interrupts. */
919 	err = __cpu_disable();
920 	if (err < 0)
921 		return err;
922 
923 	/*
924 	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
925 	 * do this step again.
926 	 */
927 	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
928 	st->state--;
929 	/* Invoke the former CPU_DYING callbacks */
930 	for (; st->state > target; st->state--) {
931 		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
932 		/*
933 		 * DYING must not fail!
934 		 */
935 		WARN_ON_ONCE(ret);
936 	}
937 
938 	/* Give up timekeeping duties */
939 	tick_handover_do_timer();
940 	/* Remove CPU from timer broadcasting */
941 	tick_offline_cpu(cpu);
942 	/* Park the stopper thread */
943 	stop_machine_park(cpu);
944 	return 0;
945 }
946 
takedown_cpu(unsigned int cpu)947 static int takedown_cpu(unsigned int cpu)
948 {
949 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
950 	int err;
951 
952 	/* Park the smpboot threads */
953 	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
954 
955 	/*
956 	 * Prevent irq alloc/free while the dying cpu reorganizes the
957 	 * interrupt affinities.
958 	 */
959 	irq_lock_sparse();
960 
961 	/*
962 	 * So now all preempt/rcu users must observe !cpu_active().
963 	 */
964 	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
965 	if (err) {
966 		/* CPU refused to die */
967 		irq_unlock_sparse();
968 		/* Unpark the hotplug thread so we can rollback there */
969 		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
970 		return err;
971 	}
972 	BUG_ON(cpu_online(cpu));
973 
974 	/*
975 	 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
976 	 * all runnable tasks from the CPU, there's only the idle task left now
977 	 * that the migration thread is done doing the stop_machine thing.
978 	 *
979 	 * Wait for the stop thread to go away.
980 	 */
981 	wait_for_ap_thread(st, false);
982 	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
983 
984 	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
985 	irq_unlock_sparse();
986 
987 	hotplug_cpu__broadcast_tick_pull(cpu);
988 	/* This actually kills the CPU. */
989 	__cpu_die(cpu);
990 
991 	tick_cleanup_dead_cpu(cpu);
992 	rcutree_migrate_callbacks(cpu);
993 	return 0;
994 }
995 
cpuhp_complete_idle_dead(void * arg)996 static void cpuhp_complete_idle_dead(void *arg)
997 {
998 	struct cpuhp_cpu_state *st = arg;
999 
1000 	complete_ap_thread(st, false);
1001 }
1002 
cpuhp_report_idle_dead(void)1003 void cpuhp_report_idle_dead(void)
1004 {
1005 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1006 
1007 	BUG_ON(st->state != CPUHP_AP_OFFLINE);
1008 	rcu_report_dead(smp_processor_id());
1009 	st->state = CPUHP_AP_IDLE_DEAD;
1010 	/*
1011 	 * We cannot call complete after rcu_report_dead() so we delegate it
1012 	 * to an online cpu.
1013 	 */
1014 	smp_call_function_single(cpumask_first(cpu_online_mask),
1015 				 cpuhp_complete_idle_dead, st, 0);
1016 }
1017 
undo_cpu_down(unsigned int cpu,struct cpuhp_cpu_state * st)1018 static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
1019 {
1020 	for (st->state++; st->state < st->target; st->state++)
1021 		cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1022 }
1023 
cpuhp_down_callbacks(unsigned int cpu,struct cpuhp_cpu_state * st,enum cpuhp_state target)1024 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1025 				enum cpuhp_state target)
1026 {
1027 	enum cpuhp_state prev_state = st->state;
1028 	int ret = 0;
1029 
1030 	for (; st->state > target; st->state--) {
1031 		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
1032 		if (ret) {
1033 			st->target = prev_state;
1034 			if (st->state < prev_state)
1035 				undo_cpu_down(cpu, st);
1036 			break;
1037 		}
1038 	}
1039 	return ret;
1040 }
1041 
1042 /* Requires cpu_add_remove_lock to be held */
_cpu_down(unsigned int cpu,int tasks_frozen,enum cpuhp_state target)1043 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
1044 			   enum cpuhp_state target)
1045 {
1046 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1047 	int prev_state, ret = 0;
1048 
1049 	if (num_online_cpus() == 1)
1050 		return -EBUSY;
1051 
1052 	if (!cpu_present(cpu))
1053 		return -EINVAL;
1054 
1055 #ifdef CONFIG_CPU_ISOLATION_OPT
1056 	if (!tasks_frozen && !cpu_isolated(cpu) && num_online_uniso_cpus() == 1)
1057 		return -EBUSY;
1058 #endif
1059 
1060 	cpus_write_lock();
1061 
1062 	cpuhp_tasks_frozen = tasks_frozen;
1063 
1064 	prev_state = cpuhp_set_state(st, target);
1065 	/*
1066 	 * If the current CPU state is in the range of the AP hotplug thread,
1067 	 * then we need to kick the thread.
1068 	 */
1069 	if (st->state > CPUHP_TEARDOWN_CPU) {
1070 		st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1071 		ret = cpuhp_kick_ap_work(cpu);
1072 		/*
1073 		 * The AP side has done the error rollback already. Just
1074 		 * return the error code..
1075 		 */
1076 		if (ret)
1077 			goto out;
1078 
1079 		/*
1080 		 * We might have stopped still in the range of the AP hotplug
1081 		 * thread. Nothing to do anymore.
1082 		 */
1083 		if (st->state > CPUHP_TEARDOWN_CPU)
1084 			goto out;
1085 
1086 		st->target = target;
1087 	}
1088 	/*
1089 	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1090 	 * to do the further cleanups.
1091 	 */
1092 	ret = cpuhp_down_callbacks(cpu, st, target);
1093 	if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
1094 		cpuhp_reset_state(st, prev_state);
1095 		__cpuhp_kick_ap(st);
1096 	}
1097 
1098 out:
1099 	cpus_write_unlock();
1100 	/*
1101 	 * Do post unplug cleanup. This is still protected against
1102 	 * concurrent CPU hotplug via cpu_add_remove_lock.
1103 	 */
1104 	lockup_detector_cleanup();
1105 	arch_smt_update();
1106 	cpu_up_down_serialize_trainwrecks(tasks_frozen);
1107 	return ret;
1108 }
1109 
cpu_down_maps_locked(unsigned int cpu,enum cpuhp_state target)1110 static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1111 {
1112 	if (cpu_hotplug_disabled)
1113 		return -EBUSY;
1114 	return _cpu_down(cpu, 0, target);
1115 }
1116 
cpu_down(unsigned int cpu,enum cpuhp_state target)1117 static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1118 {
1119 	int err;
1120 
1121 	cpu_maps_update_begin();
1122 	err = cpu_down_maps_locked(cpu, target);
1123 	cpu_maps_update_done();
1124 	return err;
1125 }
1126 
1127 /**
1128  * cpu_device_down - Bring down a cpu device
1129  * @dev: Pointer to the cpu device to offline
1130  *
1131  * This function is meant to be used by device core cpu subsystem only.
1132  *
1133  * Other subsystems should use remove_cpu() instead.
1134  */
cpu_device_down(struct device * dev)1135 int cpu_device_down(struct device *dev)
1136 {
1137 	return cpu_down(dev->id, CPUHP_OFFLINE);
1138 }
1139 
remove_cpu(unsigned int cpu)1140 int remove_cpu(unsigned int cpu)
1141 {
1142 	int ret;
1143 
1144 	lock_device_hotplug();
1145 	ret = device_offline(get_cpu_device(cpu));
1146 	unlock_device_hotplug();
1147 
1148 	return ret;
1149 }
1150 EXPORT_SYMBOL_GPL(remove_cpu);
1151 
smp_shutdown_nonboot_cpus(unsigned int primary_cpu)1152 void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
1153 {
1154 	unsigned int cpu;
1155 	int error;
1156 
1157 	cpu_maps_update_begin();
1158 
1159 	/*
1160 	 * Make certain the cpu I'm about to reboot on is online.
1161 	 *
1162 	 * This is inline to what migrate_to_reboot_cpu() already do.
1163 	 */
1164 	if (!cpu_online(primary_cpu))
1165 		primary_cpu = cpumask_first(cpu_online_mask);
1166 
1167 	for_each_online_cpu(cpu) {
1168 		if (cpu == primary_cpu)
1169 			continue;
1170 
1171 		error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
1172 		if (error) {
1173 			pr_err("Failed to offline CPU%d - error=%d",
1174 				cpu, error);
1175 			break;
1176 		}
1177 	}
1178 
1179 	/*
1180 	 * Ensure all but the reboot CPU are offline.
1181 	 */
1182 	BUG_ON(num_online_cpus() > 1);
1183 
1184 	/*
1185 	 * Make sure the CPUs won't be enabled by someone else after this
1186 	 * point. Kexec will reboot to a new kernel shortly resetting
1187 	 * everything along the way.
1188 	 */
1189 	cpu_hotplug_disabled++;
1190 
1191 	cpu_maps_update_done();
1192 }
1193 
1194 #else
1195 #define takedown_cpu		NULL
1196 #endif /*CONFIG_HOTPLUG_CPU*/
1197 
1198 /**
1199  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1200  * @cpu: cpu that just started
1201  *
1202  * It must be called by the arch code on the new cpu, before the new cpu
1203  * enables interrupts and before the "boot" cpu returns from __cpu_up().
1204  */
notify_cpu_starting(unsigned int cpu)1205 void notify_cpu_starting(unsigned int cpu)
1206 {
1207 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1208 	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1209 	int ret;
1210 
1211 	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
1212 	cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1213 	while (st->state < target) {
1214 		st->state++;
1215 		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1216 		/*
1217 		 * STARTING must not fail!
1218 		 */
1219 		WARN_ON_ONCE(ret);
1220 	}
1221 }
1222 
1223 /*
1224  * Called from the idle task. Wake up the controlling task which brings the
1225  * hotplug thread of the upcoming CPU up and then delegates the rest of the
1226  * online bringup to the hotplug thread.
1227  */
cpuhp_online_idle(enum cpuhp_state state)1228 void cpuhp_online_idle(enum cpuhp_state state)
1229 {
1230 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1231 
1232 	/* Happens for the boot cpu */
1233 	if (state != CPUHP_AP_ONLINE_IDLE)
1234 		return;
1235 
1236 	/*
1237 	 * Unpart the stopper thread before we start the idle loop (and start
1238 	 * scheduling); this ensures the stopper task is always available.
1239 	 */
1240 	stop_machine_unpark(smp_processor_id());
1241 
1242 	st->state = CPUHP_AP_ONLINE_IDLE;
1243 	complete_ap_thread(st, true);
1244 }
1245 
1246 /* Requires cpu_add_remove_lock to be held */
_cpu_up(unsigned int cpu,int tasks_frozen,enum cpuhp_state target)1247 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1248 {
1249 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1250 	struct task_struct *idle;
1251 	int ret = 0;
1252 
1253 	cpus_write_lock();
1254 
1255 	if (!cpu_present(cpu)) {
1256 		ret = -EINVAL;
1257 		goto out;
1258 	}
1259 
1260 	/*
1261 	 * The caller of cpu_up() might have raced with another
1262 	 * caller. Nothing to do.
1263 	 */
1264 	if (st->state >= target)
1265 		goto out;
1266 
1267 	if (st->state == CPUHP_OFFLINE) {
1268 		/* Let it fail before we try to bring the cpu up */
1269 		idle = idle_thread_get(cpu);
1270 		if (IS_ERR(idle)) {
1271 			ret = PTR_ERR(idle);
1272 			goto out;
1273 		}
1274 	}
1275 
1276 	cpuhp_tasks_frozen = tasks_frozen;
1277 
1278 	cpuhp_set_state(st, target);
1279 	/*
1280 	 * If the current CPU state is in the range of the AP hotplug thread,
1281 	 * then we need to kick the thread once more.
1282 	 */
1283 	if (st->state > CPUHP_BRINGUP_CPU) {
1284 		ret = cpuhp_kick_ap_work(cpu);
1285 		/*
1286 		 * The AP side has done the error rollback already. Just
1287 		 * return the error code..
1288 		 */
1289 		if (ret)
1290 			goto out;
1291 	}
1292 
1293 	/*
1294 	 * Try to reach the target state. We max out on the BP at
1295 	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1296 	 * responsible for bringing it up to the target state.
1297 	 */
1298 	target = min((int)target, CPUHP_BRINGUP_CPU);
1299 	ret = cpuhp_up_callbacks(cpu, st, target);
1300 out:
1301 	cpus_write_unlock();
1302 	arch_smt_update();
1303 	cpu_up_down_serialize_trainwrecks(tasks_frozen);
1304 	return ret;
1305 }
1306 
cpu_up(unsigned int cpu,enum cpuhp_state target)1307 static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1308 {
1309 	int err = 0;
1310 
1311 	if (!cpu_possible(cpu)) {
1312 		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1313 		       cpu);
1314 #if defined(CONFIG_IA64)
1315 		pr_err("please check additional_cpus= boot parameter\n");
1316 #endif
1317 		return -EINVAL;
1318 	}
1319 
1320 	err = try_online_node(cpu_to_node(cpu));
1321 	if (err)
1322 		return err;
1323 
1324 	cpu_maps_update_begin();
1325 
1326 	if (cpu_hotplug_disabled) {
1327 		err = -EBUSY;
1328 		goto out;
1329 	}
1330 	if (!cpu_smt_allowed(cpu)) {
1331 		err = -EPERM;
1332 		goto out;
1333 	}
1334 
1335 	err = _cpu_up(cpu, 0, target);
1336 out:
1337 	cpu_maps_update_done();
1338 	return err;
1339 }
1340 
1341 /**
1342  * cpu_device_up - Bring up a cpu device
1343  * @dev: Pointer to the cpu device to online
1344  *
1345  * This function is meant to be used by device core cpu subsystem only.
1346  *
1347  * Other subsystems should use add_cpu() instead.
1348  */
cpu_device_up(struct device * dev)1349 int cpu_device_up(struct device *dev)
1350 {
1351 	return cpu_up(dev->id, CPUHP_ONLINE);
1352 }
1353 
add_cpu(unsigned int cpu)1354 int add_cpu(unsigned int cpu)
1355 {
1356 	int ret;
1357 
1358 	lock_device_hotplug();
1359 	ret = device_online(get_cpu_device(cpu));
1360 	unlock_device_hotplug();
1361 
1362 	return ret;
1363 }
1364 EXPORT_SYMBOL_GPL(add_cpu);
1365 
1366 /**
1367  * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
1368  * @sleep_cpu: The cpu we hibernated on and should be brought up.
1369  *
1370  * On some architectures like arm64, we can hibernate on any CPU, but on
1371  * wake up the CPU we hibernated on might be offline as a side effect of
1372  * using maxcpus= for example.
1373  */
bringup_hibernate_cpu(unsigned int sleep_cpu)1374 int bringup_hibernate_cpu(unsigned int sleep_cpu)
1375 {
1376 	int ret;
1377 
1378 	if (!cpu_online(sleep_cpu)) {
1379 		pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
1380 		ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
1381 		if (ret) {
1382 			pr_err("Failed to bring hibernate-CPU up!\n");
1383 			return ret;
1384 		}
1385 	}
1386 	return 0;
1387 }
1388 
bringup_nonboot_cpus(unsigned int setup_max_cpus)1389 void bringup_nonboot_cpus(unsigned int setup_max_cpus)
1390 {
1391 	unsigned int cpu;
1392 
1393 	for_each_present_cpu(cpu) {
1394 		if (num_online_cpus() >= setup_max_cpus)
1395 			break;
1396 		if (!cpu_online(cpu))
1397 			cpu_up(cpu, CPUHP_ONLINE);
1398 	}
1399 }
1400 
1401 #ifdef CONFIG_PM_SLEEP_SMP
1402 static cpumask_var_t frozen_cpus;
1403 
freeze_secondary_cpus(int primary)1404 int freeze_secondary_cpus(int primary)
1405 {
1406 	int cpu, error = 0;
1407 
1408 	cpu_maps_update_begin();
1409 	if (primary == -1) {
1410 		primary = cpumask_first(cpu_online_mask);
1411 		if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
1412 			primary = housekeeping_any_cpu(HK_FLAG_TIMER);
1413 	} else {
1414 		if (!cpu_online(primary))
1415 			primary = cpumask_first(cpu_online_mask);
1416 	}
1417 
1418 	/*
1419 	 * We take down all of the non-boot CPUs in one shot to avoid races
1420 	 * with the userspace trying to use the CPU hotplug at the same time
1421 	 */
1422 	cpumask_clear(frozen_cpus);
1423 
1424 	pr_info("Disabling non-boot CPUs ...\n");
1425 	for_each_online_cpu(cpu) {
1426 		if (cpu == primary)
1427 			continue;
1428 
1429 		if (pm_wakeup_pending()) {
1430 			pr_info("Wakeup pending. Abort CPU freeze\n");
1431 			error = -EBUSY;
1432 			break;
1433 		}
1434 
1435 		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1436 		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1437 		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1438 		if (!error)
1439 			cpumask_set_cpu(cpu, frozen_cpus);
1440 		else {
1441 			pr_err("Error taking CPU%d down: %d\n", cpu, error);
1442 			break;
1443 		}
1444 	}
1445 
1446 	if (!error)
1447 		BUG_ON(num_online_cpus() > 1);
1448 	else
1449 		pr_err("Non-boot CPUs are not disabled\n");
1450 
1451 	/*
1452 	 * Make sure the CPUs won't be enabled by someone else. We need to do
1453 	 * this even in case of failure as all freeze_secondary_cpus() users are
1454 	 * supposed to do thaw_secondary_cpus() on the failure path.
1455 	 */
1456 	cpu_hotplug_disabled++;
1457 
1458 	cpu_maps_update_done();
1459 	return error;
1460 }
1461 
arch_thaw_secondary_cpus_begin(void)1462 void __weak arch_thaw_secondary_cpus_begin(void)
1463 {
1464 }
1465 
arch_thaw_secondary_cpus_end(void)1466 void __weak arch_thaw_secondary_cpus_end(void)
1467 {
1468 }
1469 
thaw_secondary_cpus(void)1470 void thaw_secondary_cpus(void)
1471 {
1472 	int cpu, error;
1473 
1474 	/* Allow everyone to use the CPU hotplug again */
1475 	cpu_maps_update_begin();
1476 	__cpu_hotplug_enable();
1477 	if (cpumask_empty(frozen_cpus))
1478 		goto out;
1479 
1480 	pr_info("Enabling non-boot CPUs ...\n");
1481 
1482 	arch_thaw_secondary_cpus_begin();
1483 
1484 	for_each_cpu(cpu, frozen_cpus) {
1485 		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1486 		error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1487 		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1488 		if (!error) {
1489 			pr_info("CPU%d is up\n", cpu);
1490 			continue;
1491 		}
1492 		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1493 	}
1494 
1495 	arch_thaw_secondary_cpus_end();
1496 
1497 	cpumask_clear(frozen_cpus);
1498 out:
1499 	cpu_maps_update_done();
1500 }
1501 
alloc_frozen_cpus(void)1502 static int __init alloc_frozen_cpus(void)
1503 {
1504 	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1505 		return -ENOMEM;
1506 	return 0;
1507 }
1508 core_initcall(alloc_frozen_cpus);
1509 
1510 /*
1511  * When callbacks for CPU hotplug notifications are being executed, we must
1512  * ensure that the state of the system with respect to the tasks being frozen
1513  * or not, as reported by the notification, remains unchanged *throughout the
1514  * duration* of the execution of the callbacks.
1515  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1516  *
1517  * This synchronization is implemented by mutually excluding regular CPU
1518  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1519  * Hibernate notifications.
1520  */
1521 static int
cpu_hotplug_pm_callback(struct notifier_block * nb,unsigned long action,void * ptr)1522 cpu_hotplug_pm_callback(struct notifier_block *nb,
1523 			unsigned long action, void *ptr)
1524 {
1525 	switch (action) {
1526 
1527 	case PM_SUSPEND_PREPARE:
1528 	case PM_HIBERNATION_PREPARE:
1529 		cpu_hotplug_disable();
1530 		break;
1531 
1532 	case PM_POST_SUSPEND:
1533 	case PM_POST_HIBERNATION:
1534 		cpu_hotplug_enable();
1535 		break;
1536 
1537 	default:
1538 		return NOTIFY_DONE;
1539 	}
1540 
1541 	return NOTIFY_OK;
1542 }
1543 
1544 
cpu_hotplug_pm_sync_init(void)1545 static int __init cpu_hotplug_pm_sync_init(void)
1546 {
1547 	/*
1548 	 * cpu_hotplug_pm_callback has higher priority than x86
1549 	 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1550 	 * to disable cpu hotplug to avoid cpu hotplug race.
1551 	 */
1552 	pm_notifier(cpu_hotplug_pm_callback, 0);
1553 	return 0;
1554 }
1555 core_initcall(cpu_hotplug_pm_sync_init);
1556 
1557 #endif /* CONFIG_PM_SLEEP_SMP */
1558 
1559 int __boot_cpu_id;
1560 
1561 #endif /* CONFIG_SMP */
1562 
1563 /* Boot processor state steps */
1564 static struct cpuhp_step cpuhp_hp_states[] = {
1565 	[CPUHP_OFFLINE] = {
1566 		.name			= "offline",
1567 		.startup.single		= NULL,
1568 		.teardown.single	= NULL,
1569 	},
1570 #ifdef CONFIG_SMP
1571 	[CPUHP_CREATE_THREADS]= {
1572 		.name			= "threads:prepare",
1573 		.startup.single		= smpboot_create_threads,
1574 		.teardown.single	= NULL,
1575 		.cant_stop		= true,
1576 	},
1577 	[CPUHP_PERF_PREPARE] = {
1578 		.name			= "perf:prepare",
1579 		.startup.single		= perf_event_init_cpu,
1580 		.teardown.single	= perf_event_exit_cpu,
1581 	},
1582 	[CPUHP_WORKQUEUE_PREP] = {
1583 		.name			= "workqueue:prepare",
1584 		.startup.single		= workqueue_prepare_cpu,
1585 		.teardown.single	= NULL,
1586 	},
1587 	[CPUHP_HRTIMERS_PREPARE] = {
1588 		.name			= "hrtimers:prepare",
1589 		.startup.single		= hrtimers_prepare_cpu,
1590 		.teardown.single	= hrtimers_dead_cpu,
1591 	},
1592 	[CPUHP_SMPCFD_PREPARE] = {
1593 		.name			= "smpcfd:prepare",
1594 		.startup.single		= smpcfd_prepare_cpu,
1595 		.teardown.single	= smpcfd_dead_cpu,
1596 	},
1597 	[CPUHP_RELAY_PREPARE] = {
1598 		.name			= "relay:prepare",
1599 		.startup.single		= relay_prepare_cpu,
1600 		.teardown.single	= NULL,
1601 	},
1602 	[CPUHP_SLAB_PREPARE] = {
1603 		.name			= "slab:prepare",
1604 		.startup.single		= slab_prepare_cpu,
1605 		.teardown.single	= slab_dead_cpu,
1606 	},
1607 	[CPUHP_RCUTREE_PREP] = {
1608 		.name			= "RCU/tree:prepare",
1609 		.startup.single		= rcutree_prepare_cpu,
1610 		.teardown.single	= rcutree_dead_cpu,
1611 	},
1612 	/*
1613 	 * On the tear-down path, timers_dead_cpu() must be invoked
1614 	 * before blk_mq_queue_reinit_notify() from notify_dead(),
1615 	 * otherwise a RCU stall occurs.
1616 	 */
1617 	[CPUHP_TIMERS_PREPARE] = {
1618 		.name			= "timers:prepare",
1619 		.startup.single		= timers_prepare_cpu,
1620 		.teardown.single	= timers_dead_cpu,
1621 	},
1622 	/* Kicks the plugged cpu into life */
1623 	[CPUHP_BRINGUP_CPU] = {
1624 		.name			= "cpu:bringup",
1625 		.startup.single		= bringup_cpu,
1626 		.teardown.single	= finish_cpu,
1627 		.cant_stop		= true,
1628 	},
1629 	/* Final state before CPU kills itself */
1630 	[CPUHP_AP_IDLE_DEAD] = {
1631 		.name			= "idle:dead",
1632 	},
1633 	/*
1634 	 * Last state before CPU enters the idle loop to die. Transient state
1635 	 * for synchronization.
1636 	 */
1637 	[CPUHP_AP_OFFLINE] = {
1638 		.name			= "ap:offline",
1639 		.cant_stop		= true,
1640 	},
1641 	/* First state is scheduler control. Interrupts are disabled */
1642 	[CPUHP_AP_SCHED_STARTING] = {
1643 		.name			= "sched:starting",
1644 		.startup.single		= sched_cpu_starting,
1645 		.teardown.single	= sched_cpu_dying,
1646 	},
1647 	[CPUHP_AP_RCUTREE_DYING] = {
1648 		.name			= "RCU/tree:dying",
1649 		.startup.single		= NULL,
1650 		.teardown.single	= rcutree_dying_cpu,
1651 	},
1652 	[CPUHP_AP_SMPCFD_DYING] = {
1653 		.name			= "smpcfd:dying",
1654 		.startup.single		= NULL,
1655 		.teardown.single	= smpcfd_dying_cpu,
1656 	},
1657 	/* Entry state on starting. Interrupts enabled from here on. Transient
1658 	 * state for synchronsization */
1659 	[CPUHP_AP_ONLINE] = {
1660 		.name			= "ap:online",
1661 	},
1662 	/*
1663 	 * Handled on controll processor until the plugged processor manages
1664 	 * this itself.
1665 	 */
1666 	[CPUHP_TEARDOWN_CPU] = {
1667 		.name			= "cpu:teardown",
1668 		.startup.single		= NULL,
1669 		.teardown.single	= takedown_cpu,
1670 		.cant_stop		= true,
1671 	},
1672 	/* Handle smpboot threads park/unpark */
1673 	[CPUHP_AP_SMPBOOT_THREADS] = {
1674 		.name			= "smpboot/threads:online",
1675 		.startup.single		= smpboot_unpark_threads,
1676 		.teardown.single	= smpboot_park_threads,
1677 	},
1678 	[CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
1679 		.name			= "irq/affinity:online",
1680 		.startup.single		= irq_affinity_online_cpu,
1681 		.teardown.single	= NULL,
1682 	},
1683 	[CPUHP_AP_PERF_ONLINE] = {
1684 		.name			= "perf:online",
1685 		.startup.single		= perf_event_init_cpu,
1686 		.teardown.single	= perf_event_exit_cpu,
1687 	},
1688 	[CPUHP_AP_WATCHDOG_ONLINE] = {
1689 		.name			= "lockup_detector:online",
1690 		.startup.single		= lockup_detector_online_cpu,
1691 		.teardown.single	= lockup_detector_offline_cpu,
1692 	},
1693 	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1694 		.name			= "workqueue:online",
1695 		.startup.single		= workqueue_online_cpu,
1696 		.teardown.single	= workqueue_offline_cpu,
1697 	},
1698 	[CPUHP_AP_RCUTREE_ONLINE] = {
1699 		.name			= "RCU/tree:online",
1700 		.startup.single		= rcutree_online_cpu,
1701 		.teardown.single	= rcutree_offline_cpu,
1702 	},
1703 #endif
1704 	/*
1705 	 * The dynamically registered state space is here
1706 	 */
1707 
1708 #ifdef CONFIG_SMP
1709 	/* Last state is scheduler control setting the cpu active */
1710 	[CPUHP_AP_ACTIVE] = {
1711 		.name			= "sched:active",
1712 		.startup.single		= sched_cpu_activate,
1713 		.teardown.single	= sched_cpu_deactivate,
1714 	},
1715 #endif
1716 
1717 	/* CPU is fully up and running. */
1718 	[CPUHP_ONLINE] = {
1719 		.name			= "online",
1720 		.startup.single		= NULL,
1721 		.teardown.single	= NULL,
1722 	},
1723 };
1724 
1725 /* Sanity check for callbacks */
cpuhp_cb_check(enum cpuhp_state state)1726 static int cpuhp_cb_check(enum cpuhp_state state)
1727 {
1728 	if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1729 		return -EINVAL;
1730 	return 0;
1731 }
1732 
1733 /*
1734  * Returns a free for dynamic slot assignment of the Online state. The states
1735  * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1736  * by having no name assigned.
1737  */
cpuhp_reserve_state(enum cpuhp_state state)1738 static int cpuhp_reserve_state(enum cpuhp_state state)
1739 {
1740 	enum cpuhp_state i, end;
1741 	struct cpuhp_step *step;
1742 
1743 	switch (state) {
1744 	case CPUHP_AP_ONLINE_DYN:
1745 		step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1746 		end = CPUHP_AP_ONLINE_DYN_END;
1747 		break;
1748 	case CPUHP_BP_PREPARE_DYN:
1749 		step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1750 		end = CPUHP_BP_PREPARE_DYN_END;
1751 		break;
1752 	default:
1753 		return -EINVAL;
1754 	}
1755 
1756 	for (i = state; i <= end; i++, step++) {
1757 		if (!step->name)
1758 			return i;
1759 	}
1760 	WARN(1, "No more dynamic states available for CPU hotplug\n");
1761 	return -ENOSPC;
1762 }
1763 
cpuhp_store_callbacks(enum cpuhp_state state,const char * name,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)1764 static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1765 				 int (*startup)(unsigned int cpu),
1766 				 int (*teardown)(unsigned int cpu),
1767 				 bool multi_instance)
1768 {
1769 	/* (Un)Install the callbacks for further cpu hotplug operations */
1770 	struct cpuhp_step *sp;
1771 	int ret = 0;
1772 
1773 	/*
1774 	 * If name is NULL, then the state gets removed.
1775 	 *
1776 	 * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1777 	 * the first allocation from these dynamic ranges, so the removal
1778 	 * would trigger a new allocation and clear the wrong (already
1779 	 * empty) state, leaving the callbacks of the to be cleared state
1780 	 * dangling, which causes wreckage on the next hotplug operation.
1781 	 */
1782 	if (name && (state == CPUHP_AP_ONLINE_DYN ||
1783 		     state == CPUHP_BP_PREPARE_DYN)) {
1784 		ret = cpuhp_reserve_state(state);
1785 		if (ret < 0)
1786 			return ret;
1787 		state = ret;
1788 	}
1789 	sp = cpuhp_get_step(state);
1790 	if (name && sp->name)
1791 		return -EBUSY;
1792 
1793 	sp->startup.single = startup;
1794 	sp->teardown.single = teardown;
1795 	sp->name = name;
1796 	sp->multi_instance = multi_instance;
1797 	INIT_HLIST_HEAD(&sp->list);
1798 	return ret;
1799 }
1800 
cpuhp_get_teardown_cb(enum cpuhp_state state)1801 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1802 {
1803 	return cpuhp_get_step(state)->teardown.single;
1804 }
1805 
1806 /*
1807  * Call the startup/teardown function for a step either on the AP or
1808  * on the current CPU.
1809  */
cpuhp_issue_call(int cpu,enum cpuhp_state state,bool bringup,struct hlist_node * node)1810 static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1811 			    struct hlist_node *node)
1812 {
1813 	struct cpuhp_step *sp = cpuhp_get_step(state);
1814 	int ret;
1815 
1816 	/*
1817 	 * If there's nothing to do, we done.
1818 	 * Relies on the union for multi_instance.
1819 	 */
1820 	if ((bringup && !sp->startup.single) ||
1821 	    (!bringup && !sp->teardown.single))
1822 		return 0;
1823 	/*
1824 	 * The non AP bound callbacks can fail on bringup. On teardown
1825 	 * e.g. module removal we crash for now.
1826 	 */
1827 #ifdef CONFIG_SMP
1828 	if (cpuhp_is_ap_state(state))
1829 		ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1830 	else
1831 		ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1832 #else
1833 	ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1834 #endif
1835 	BUG_ON(ret && !bringup);
1836 	return ret;
1837 }
1838 
1839 /*
1840  * Called from __cpuhp_setup_state on a recoverable failure.
1841  *
1842  * Note: The teardown callbacks for rollback are not allowed to fail!
1843  */
cpuhp_rollback_install(int failedcpu,enum cpuhp_state state,struct hlist_node * node)1844 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1845 				   struct hlist_node *node)
1846 {
1847 	int cpu;
1848 
1849 	/* Roll back the already executed steps on the other cpus */
1850 	for_each_present_cpu(cpu) {
1851 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1852 		int cpustate = st->state;
1853 
1854 		if (cpu >= failedcpu)
1855 			break;
1856 
1857 		/* Did we invoke the startup call on that cpu ? */
1858 		if (cpustate >= state)
1859 			cpuhp_issue_call(cpu, state, false, node);
1860 	}
1861 }
1862 
__cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,struct hlist_node * node,bool invoke)1863 int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
1864 					  struct hlist_node *node,
1865 					  bool invoke)
1866 {
1867 	struct cpuhp_step *sp;
1868 	int cpu;
1869 	int ret;
1870 
1871 	lockdep_assert_cpus_held();
1872 
1873 	sp = cpuhp_get_step(state);
1874 	if (sp->multi_instance == false)
1875 		return -EINVAL;
1876 
1877 	mutex_lock(&cpuhp_state_mutex);
1878 
1879 	if (!invoke || !sp->startup.multi)
1880 		goto add_node;
1881 
1882 	/*
1883 	 * Try to call the startup callback for each present cpu
1884 	 * depending on the hotplug state of the cpu.
1885 	 */
1886 	for_each_present_cpu(cpu) {
1887 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1888 		int cpustate = st->state;
1889 
1890 		if (cpustate < state)
1891 			continue;
1892 
1893 		ret = cpuhp_issue_call(cpu, state, true, node);
1894 		if (ret) {
1895 			if (sp->teardown.multi)
1896 				cpuhp_rollback_install(cpu, state, node);
1897 			goto unlock;
1898 		}
1899 	}
1900 add_node:
1901 	ret = 0;
1902 	hlist_add_head(node, &sp->list);
1903 unlock:
1904 	mutex_unlock(&cpuhp_state_mutex);
1905 	return ret;
1906 }
1907 
__cpuhp_state_add_instance(enum cpuhp_state state,struct hlist_node * node,bool invoke)1908 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1909 			       bool invoke)
1910 {
1911 	int ret;
1912 
1913 	cpus_read_lock();
1914 	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1915 	cpus_read_unlock();
1916 	return ret;
1917 }
1918 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1919 
1920 /**
1921  * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1922  * @state:		The state to setup
1923  * @invoke:		If true, the startup function is invoked for cpus where
1924  *			cpu state >= @state
1925  * @startup:		startup callback function
1926  * @teardown:		teardown callback function
1927  * @multi_instance:	State is set up for multiple instances which get
1928  *			added afterwards.
1929  *
1930  * The caller needs to hold cpus read locked while calling this function.
1931  * Returns:
1932  *   On success:
1933  *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
1934  *      0 for all other states
1935  *   On failure: proper (negative) error code
1936  */
__cpuhp_setup_state_cpuslocked(enum cpuhp_state state,const char * name,bool invoke,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)1937 int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
1938 				   const char *name, bool invoke,
1939 				   int (*startup)(unsigned int cpu),
1940 				   int (*teardown)(unsigned int cpu),
1941 				   bool multi_instance)
1942 {
1943 	int cpu, ret = 0;
1944 	bool dynstate;
1945 
1946 	lockdep_assert_cpus_held();
1947 
1948 	if (cpuhp_cb_check(state) || !name)
1949 		return -EINVAL;
1950 
1951 	mutex_lock(&cpuhp_state_mutex);
1952 
1953 	ret = cpuhp_store_callbacks(state, name, startup, teardown,
1954 				    multi_instance);
1955 
1956 	dynstate = state == CPUHP_AP_ONLINE_DYN;
1957 	if (ret > 0 && dynstate) {
1958 		state = ret;
1959 		ret = 0;
1960 	}
1961 
1962 	if (ret || !invoke || !startup)
1963 		goto out;
1964 
1965 	/*
1966 	 * Try to call the startup callback for each present cpu
1967 	 * depending on the hotplug state of the cpu.
1968 	 */
1969 	for_each_present_cpu(cpu) {
1970 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1971 		int cpustate = st->state;
1972 
1973 		if (cpustate < state)
1974 			continue;
1975 
1976 		ret = cpuhp_issue_call(cpu, state, true, NULL);
1977 		if (ret) {
1978 			if (teardown)
1979 				cpuhp_rollback_install(cpu, state, NULL);
1980 			cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1981 			goto out;
1982 		}
1983 	}
1984 out:
1985 	mutex_unlock(&cpuhp_state_mutex);
1986 	/*
1987 	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
1988 	 * dynamically allocated state in case of success.
1989 	 */
1990 	if (!ret && dynstate)
1991 		return state;
1992 	return ret;
1993 }
1994 EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
1995 
__cpuhp_setup_state(enum cpuhp_state state,const char * name,bool invoke,int (* startup)(unsigned int cpu),int (* teardown)(unsigned int cpu),bool multi_instance)1996 int __cpuhp_setup_state(enum cpuhp_state state,
1997 			const char *name, bool invoke,
1998 			int (*startup)(unsigned int cpu),
1999 			int (*teardown)(unsigned int cpu),
2000 			bool multi_instance)
2001 {
2002 	int ret;
2003 
2004 	cpus_read_lock();
2005 	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
2006 					     teardown, multi_instance);
2007 	cpus_read_unlock();
2008 	return ret;
2009 }
2010 EXPORT_SYMBOL(__cpuhp_setup_state);
2011 
__cpuhp_state_remove_instance(enum cpuhp_state state,struct hlist_node * node,bool invoke)2012 int __cpuhp_state_remove_instance(enum cpuhp_state state,
2013 				  struct hlist_node *node, bool invoke)
2014 {
2015 	struct cpuhp_step *sp = cpuhp_get_step(state);
2016 	int cpu;
2017 
2018 	BUG_ON(cpuhp_cb_check(state));
2019 
2020 	if (!sp->multi_instance)
2021 		return -EINVAL;
2022 
2023 	cpus_read_lock();
2024 	mutex_lock(&cpuhp_state_mutex);
2025 
2026 	if (!invoke || !cpuhp_get_teardown_cb(state))
2027 		goto remove;
2028 	/*
2029 	 * Call the teardown callback for each present cpu depending
2030 	 * on the hotplug state of the cpu. This function is not
2031 	 * allowed to fail currently!
2032 	 */
2033 	for_each_present_cpu(cpu) {
2034 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2035 		int cpustate = st->state;
2036 
2037 		if (cpustate >= state)
2038 			cpuhp_issue_call(cpu, state, false, node);
2039 	}
2040 
2041 remove:
2042 	hlist_del(node);
2043 	mutex_unlock(&cpuhp_state_mutex);
2044 	cpus_read_unlock();
2045 
2046 	return 0;
2047 }
2048 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
2049 
2050 /**
2051  * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
2052  * @state:	The state to remove
2053  * @invoke:	If true, the teardown function is invoked for cpus where
2054  *		cpu state >= @state
2055  *
2056  * The caller needs to hold cpus read locked while calling this function.
2057  * The teardown callback is currently not allowed to fail. Think
2058  * about module removal!
2059  */
__cpuhp_remove_state_cpuslocked(enum cpuhp_state state,bool invoke)2060 void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
2061 {
2062 	struct cpuhp_step *sp = cpuhp_get_step(state);
2063 	int cpu;
2064 
2065 	BUG_ON(cpuhp_cb_check(state));
2066 
2067 	lockdep_assert_cpus_held();
2068 
2069 	mutex_lock(&cpuhp_state_mutex);
2070 	if (sp->multi_instance) {
2071 		WARN(!hlist_empty(&sp->list),
2072 		     "Error: Removing state %d which has instances left.\n",
2073 		     state);
2074 		goto remove;
2075 	}
2076 
2077 	if (!invoke || !cpuhp_get_teardown_cb(state))
2078 		goto remove;
2079 
2080 	/*
2081 	 * Call the teardown callback for each present cpu depending
2082 	 * on the hotplug state of the cpu. This function is not
2083 	 * allowed to fail currently!
2084 	 */
2085 	for_each_present_cpu(cpu) {
2086 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2087 		int cpustate = st->state;
2088 
2089 		if (cpustate >= state)
2090 			cpuhp_issue_call(cpu, state, false, NULL);
2091 	}
2092 remove:
2093 	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2094 	mutex_unlock(&cpuhp_state_mutex);
2095 }
2096 EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2097 
__cpuhp_remove_state(enum cpuhp_state state,bool invoke)2098 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2099 {
2100 	cpus_read_lock();
2101 	__cpuhp_remove_state_cpuslocked(state, invoke);
2102 	cpus_read_unlock();
2103 }
2104 EXPORT_SYMBOL(__cpuhp_remove_state);
2105 
2106 #ifdef CONFIG_HOTPLUG_SMT
cpuhp_offline_cpu_device(unsigned int cpu)2107 static void cpuhp_offline_cpu_device(unsigned int cpu)
2108 {
2109 	struct device *dev = get_cpu_device(cpu);
2110 
2111 	dev->offline = true;
2112 	/* Tell user space about the state change */
2113 	kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2114 }
2115 
cpuhp_online_cpu_device(unsigned int cpu)2116 static void cpuhp_online_cpu_device(unsigned int cpu)
2117 {
2118 	struct device *dev = get_cpu_device(cpu);
2119 
2120 	dev->offline = false;
2121 	/* Tell user space about the state change */
2122 	kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2123 }
2124 
cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)2125 int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2126 {
2127 	int cpu, ret = 0;
2128 
2129 	cpu_maps_update_begin();
2130 	for_each_online_cpu(cpu) {
2131 		if (topology_is_primary_thread(cpu))
2132 			continue;
2133 		ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2134 		if (ret)
2135 			break;
2136 		/*
2137 		 * As this needs to hold the cpu maps lock it's impossible
2138 		 * to call device_offline() because that ends up calling
2139 		 * cpu_down() which takes cpu maps lock. cpu maps lock
2140 		 * needs to be held as this might race against in kernel
2141 		 * abusers of the hotplug machinery (thermal management).
2142 		 *
2143 		 * So nothing would update device:offline state. That would
2144 		 * leave the sysfs entry stale and prevent onlining after
2145 		 * smt control has been changed to 'off' again. This is
2146 		 * called under the sysfs hotplug lock, so it is properly
2147 		 * serialized against the regular offline usage.
2148 		 */
2149 		cpuhp_offline_cpu_device(cpu);
2150 	}
2151 	if (!ret)
2152 		cpu_smt_control = ctrlval;
2153 	cpu_maps_update_done();
2154 	return ret;
2155 }
2156 
cpuhp_smt_enable(void)2157 int cpuhp_smt_enable(void)
2158 {
2159 	int cpu, ret = 0;
2160 
2161 	cpu_maps_update_begin();
2162 	cpu_smt_control = CPU_SMT_ENABLED;
2163 	for_each_present_cpu(cpu) {
2164 		/* Skip online CPUs and CPUs on offline nodes */
2165 		if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2166 			continue;
2167 		ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2168 		if (ret)
2169 			break;
2170 		/* See comment in cpuhp_smt_disable() */
2171 		cpuhp_online_cpu_device(cpu);
2172 	}
2173 	cpu_maps_update_done();
2174 	return ret;
2175 }
2176 #endif
2177 
2178 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
show_cpuhp_state(struct device * dev,struct device_attribute * attr,char * buf)2179 static ssize_t show_cpuhp_state(struct device *dev,
2180 				struct device_attribute *attr, char *buf)
2181 {
2182 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2183 
2184 	return sprintf(buf, "%d\n", st->state);
2185 }
2186 static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
2187 
write_cpuhp_target(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2188 static ssize_t write_cpuhp_target(struct device *dev,
2189 				  struct device_attribute *attr,
2190 				  const char *buf, size_t count)
2191 {
2192 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2193 	struct cpuhp_step *sp;
2194 	int target, ret;
2195 
2196 	ret = kstrtoint(buf, 10, &target);
2197 	if (ret)
2198 		return ret;
2199 
2200 #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2201 	if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
2202 		return -EINVAL;
2203 #else
2204 	if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
2205 		return -EINVAL;
2206 #endif
2207 
2208 	ret = lock_device_hotplug_sysfs();
2209 	if (ret)
2210 		return ret;
2211 
2212 	mutex_lock(&cpuhp_state_mutex);
2213 	sp = cpuhp_get_step(target);
2214 	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2215 	mutex_unlock(&cpuhp_state_mutex);
2216 	if (ret)
2217 		goto out;
2218 
2219 	if (st->state < target)
2220 		ret = cpu_up(dev->id, target);
2221 	else
2222 		ret = cpu_down(dev->id, target);
2223 out:
2224 	unlock_device_hotplug();
2225 	return ret ? ret : count;
2226 }
2227 
show_cpuhp_target(struct device * dev,struct device_attribute * attr,char * buf)2228 static ssize_t show_cpuhp_target(struct device *dev,
2229 				 struct device_attribute *attr, char *buf)
2230 {
2231 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2232 
2233 	return sprintf(buf, "%d\n", st->target);
2234 }
2235 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
2236 
2237 
write_cpuhp_fail(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2238 static ssize_t write_cpuhp_fail(struct device *dev,
2239 				struct device_attribute *attr,
2240 				const char *buf, size_t count)
2241 {
2242 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2243 	struct cpuhp_step *sp;
2244 	int fail, ret;
2245 
2246 	ret = kstrtoint(buf, 10, &fail);
2247 	if (ret)
2248 		return ret;
2249 
2250 	if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
2251 		return -EINVAL;
2252 
2253 	/*
2254 	 * Cannot fail STARTING/DYING callbacks.
2255 	 */
2256 	if (cpuhp_is_atomic_state(fail))
2257 		return -EINVAL;
2258 
2259 	/*
2260 	 * Cannot fail anything that doesn't have callbacks.
2261 	 */
2262 	mutex_lock(&cpuhp_state_mutex);
2263 	sp = cpuhp_get_step(fail);
2264 	if (!sp->startup.single && !sp->teardown.single)
2265 		ret = -EINVAL;
2266 	mutex_unlock(&cpuhp_state_mutex);
2267 	if (ret)
2268 		return ret;
2269 
2270 	st->fail = fail;
2271 
2272 	return count;
2273 }
2274 
show_cpuhp_fail(struct device * dev,struct device_attribute * attr,char * buf)2275 static ssize_t show_cpuhp_fail(struct device *dev,
2276 			       struct device_attribute *attr, char *buf)
2277 {
2278 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2279 
2280 	return sprintf(buf, "%d\n", st->fail);
2281 }
2282 
2283 static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
2284 
2285 static struct attribute *cpuhp_cpu_attrs[] = {
2286 	&dev_attr_state.attr,
2287 	&dev_attr_target.attr,
2288 	&dev_attr_fail.attr,
2289 	NULL
2290 };
2291 
2292 static const struct attribute_group cpuhp_cpu_attr_group = {
2293 	.attrs = cpuhp_cpu_attrs,
2294 	.name = "hotplug",
2295 	NULL
2296 };
2297 
show_cpuhp_states(struct device * dev,struct device_attribute * attr,char * buf)2298 static ssize_t show_cpuhp_states(struct device *dev,
2299 				 struct device_attribute *attr, char *buf)
2300 {
2301 	ssize_t cur, res = 0;
2302 	int i;
2303 
2304 	mutex_lock(&cpuhp_state_mutex);
2305 	for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2306 		struct cpuhp_step *sp = cpuhp_get_step(i);
2307 
2308 		if (sp->name) {
2309 			cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2310 			buf += cur;
2311 			res += cur;
2312 		}
2313 	}
2314 	mutex_unlock(&cpuhp_state_mutex);
2315 	return res;
2316 }
2317 static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
2318 
2319 static struct attribute *cpuhp_cpu_root_attrs[] = {
2320 	&dev_attr_states.attr,
2321 	NULL
2322 };
2323 
2324 static const struct attribute_group cpuhp_cpu_root_attr_group = {
2325 	.attrs = cpuhp_cpu_root_attrs,
2326 	.name = "hotplug",
2327 	NULL
2328 };
2329 
2330 #ifdef CONFIG_HOTPLUG_SMT
2331 
2332 static ssize_t
__store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2333 __store_smt_control(struct device *dev, struct device_attribute *attr,
2334 		    const char *buf, size_t count)
2335 {
2336 	int ctrlval, ret;
2337 
2338 	if (sysfs_streq(buf, "on"))
2339 		ctrlval = CPU_SMT_ENABLED;
2340 	else if (sysfs_streq(buf, "off"))
2341 		ctrlval = CPU_SMT_DISABLED;
2342 	else if (sysfs_streq(buf, "forceoff"))
2343 		ctrlval = CPU_SMT_FORCE_DISABLED;
2344 	else
2345 		return -EINVAL;
2346 
2347 	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2348 		return -EPERM;
2349 
2350 	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2351 		return -ENODEV;
2352 
2353 	ret = lock_device_hotplug_sysfs();
2354 	if (ret)
2355 		return ret;
2356 
2357 	if (ctrlval != cpu_smt_control) {
2358 		switch (ctrlval) {
2359 		case CPU_SMT_ENABLED:
2360 			ret = cpuhp_smt_enable();
2361 			break;
2362 		case CPU_SMT_DISABLED:
2363 		case CPU_SMT_FORCE_DISABLED:
2364 			ret = cpuhp_smt_disable(ctrlval);
2365 			break;
2366 		}
2367 	}
2368 
2369 	unlock_device_hotplug();
2370 	return ret ? ret : count;
2371 }
2372 
2373 #else /* !CONFIG_HOTPLUG_SMT */
2374 static ssize_t
__store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2375 __store_smt_control(struct device *dev, struct device_attribute *attr,
2376 		    const char *buf, size_t count)
2377 {
2378 	return -ENODEV;
2379 }
2380 #endif /* CONFIG_HOTPLUG_SMT */
2381 
2382 static const char *smt_states[] = {
2383 	[CPU_SMT_ENABLED]		= "on",
2384 	[CPU_SMT_DISABLED]		= "off",
2385 	[CPU_SMT_FORCE_DISABLED]	= "forceoff",
2386 	[CPU_SMT_NOT_SUPPORTED]		= "notsupported",
2387 	[CPU_SMT_NOT_IMPLEMENTED]	= "notimplemented",
2388 };
2389 
2390 static ssize_t
show_smt_control(struct device * dev,struct device_attribute * attr,char * buf)2391 show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
2392 {
2393 	const char *state = smt_states[cpu_smt_control];
2394 
2395 	return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
2396 }
2397 
2398 static ssize_t
store_smt_control(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)2399 store_smt_control(struct device *dev, struct device_attribute *attr,
2400 		  const char *buf, size_t count)
2401 {
2402 	return __store_smt_control(dev, attr, buf, count);
2403 }
2404 static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2405 
2406 static ssize_t
show_smt_active(struct device * dev,struct device_attribute * attr,char * buf)2407 show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
2408 {
2409 	return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2410 }
2411 static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2412 
2413 static struct attribute *cpuhp_smt_attrs[] = {
2414 	&dev_attr_control.attr,
2415 	&dev_attr_active.attr,
2416 	NULL
2417 };
2418 
2419 static const struct attribute_group cpuhp_smt_attr_group = {
2420 	.attrs = cpuhp_smt_attrs,
2421 	.name = "smt",
2422 	NULL
2423 };
2424 
cpu_smt_sysfs_init(void)2425 static int __init cpu_smt_sysfs_init(void)
2426 {
2427 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2428 				  &cpuhp_smt_attr_group);
2429 }
2430 
cpuhp_sysfs_init(void)2431 static int __init cpuhp_sysfs_init(void)
2432 {
2433 	int cpu, ret;
2434 
2435 	ret = cpu_smt_sysfs_init();
2436 	if (ret)
2437 		return ret;
2438 
2439 	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
2440 				 &cpuhp_cpu_root_attr_group);
2441 	if (ret)
2442 		return ret;
2443 
2444 	for_each_possible_cpu(cpu) {
2445 		struct device *dev = get_cpu_device(cpu);
2446 
2447 		if (!dev)
2448 			continue;
2449 		ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2450 		if (ret)
2451 			return ret;
2452 	}
2453 	return 0;
2454 }
2455 device_initcall(cpuhp_sysfs_init);
2456 #endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2457 
2458 /*
2459  * cpu_bit_bitmap[] is a special, "compressed" data structure that
2460  * represents all NR_CPUS bits binary values of 1<<nr.
2461  *
2462  * It is used by cpumask_of() to get a constant address to a CPU
2463  * mask value that has a single bit set only.
2464  */
2465 
2466 /* cpu_bit_bitmap[0] is empty - so we can back into it */
2467 #define MASK_DECLARE_1(x)	[x+1][0] = (1UL << (x))
2468 #define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2469 #define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2470 #define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2471 
2472 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2473 
2474 	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
2475 	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
2476 #if BITS_PER_LONG > 32
2477 	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
2478 	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
2479 #endif
2480 };
2481 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2482 
2483 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2484 EXPORT_SYMBOL(cpu_all_bits);
2485 
2486 #ifdef CONFIG_INIT_ALL_POSSIBLE
2487 struct cpumask __cpu_possible_mask __read_mostly
2488 	= {CPU_BITS_ALL};
2489 #else
2490 struct cpumask __cpu_possible_mask __read_mostly;
2491 #endif
2492 EXPORT_SYMBOL(__cpu_possible_mask);
2493 
2494 struct cpumask __cpu_online_mask __read_mostly;
2495 EXPORT_SYMBOL(__cpu_online_mask);
2496 
2497 struct cpumask __cpu_present_mask __read_mostly;
2498 EXPORT_SYMBOL(__cpu_present_mask);
2499 
2500 struct cpumask __cpu_active_mask __read_mostly;
2501 EXPORT_SYMBOL(__cpu_active_mask);
2502 
2503 #ifdef CONFIG_CPU_ISOLATION_OPT
2504 struct cpumask __cpu_isolated_mask __read_mostly;
2505 EXPORT_SYMBOL(__cpu_isolated_mask);
2506 #endif
2507 
2508 atomic_t __num_online_cpus __read_mostly;
2509 EXPORT_SYMBOL(__num_online_cpus);
2510 
init_cpu_present(const struct cpumask * src)2511 void init_cpu_present(const struct cpumask *src)
2512 {
2513 	cpumask_copy(&__cpu_present_mask, src);
2514 }
2515 
init_cpu_possible(const struct cpumask * src)2516 void init_cpu_possible(const struct cpumask *src)
2517 {
2518 	cpumask_copy(&__cpu_possible_mask, src);
2519 }
2520 
init_cpu_online(const struct cpumask * src)2521 void init_cpu_online(const struct cpumask *src)
2522 {
2523 	cpumask_copy(&__cpu_online_mask, src);
2524 }
2525 
2526 #ifdef CONFIG_CPU_ISOLATION_OPT
init_cpu_isolated(const struct cpumask * src)2527 void init_cpu_isolated(const struct cpumask *src)
2528 {
2529 	cpumask_copy(&__cpu_isolated_mask, src);
2530 }
2531 #endif
2532 
set_cpu_online(unsigned int cpu,bool online)2533 void set_cpu_online(unsigned int cpu, bool online)
2534 {
2535 	/*
2536 	 * atomic_inc/dec() is required to handle the horrid abuse of this
2537 	 * function by the reboot and kexec code which invoke it from
2538 	 * IPI/NMI broadcasts when shutting down CPUs. Invocation from
2539 	 * regular CPU hotplug is properly serialized.
2540 	 *
2541 	 * Note, that the fact that __num_online_cpus is of type atomic_t
2542 	 * does not protect readers which are not serialized against
2543 	 * concurrent hotplug operations.
2544 	 */
2545 	if (online) {
2546 		if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
2547 			atomic_inc(&__num_online_cpus);
2548 	} else {
2549 		if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
2550 			atomic_dec(&__num_online_cpus);
2551 	}
2552 }
2553 
2554 /*
2555  * Activate the first processor.
2556  */
boot_cpu_init(void)2557 void __init boot_cpu_init(void)
2558 {
2559 	int cpu = smp_processor_id();
2560 
2561 	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
2562 	set_cpu_online(cpu, true);
2563 	set_cpu_active(cpu, true);
2564 	set_cpu_present(cpu, true);
2565 	set_cpu_possible(cpu, true);
2566 
2567 #ifdef CONFIG_SMP
2568 	__boot_cpu_id = cpu;
2569 #endif
2570 }
2571 
2572 /*
2573  * Must be called _AFTER_ setting up the per_cpu areas
2574  */
boot_cpu_hotplug_init(void)2575 void __init boot_cpu_hotplug_init(void)
2576 {
2577 #ifdef CONFIG_SMP
2578 	cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2579 #endif
2580 	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2581 }
2582 
2583 /*
2584  * These are used for a global "mitigations=" cmdline option for toggling
2585  * optional CPU mitigations.
2586  */
2587 enum cpu_mitigations {
2588 	CPU_MITIGATIONS_OFF,
2589 	CPU_MITIGATIONS_AUTO,
2590 	CPU_MITIGATIONS_AUTO_NOSMT,
2591 };
2592 
2593 static enum cpu_mitigations cpu_mitigations __ro_after_init =
2594 	CPU_MITIGATIONS_AUTO;
2595 
mitigations_parse_cmdline(char * arg)2596 static int __init mitigations_parse_cmdline(char *arg)
2597 {
2598 	if (!strcmp(arg, "off"))
2599 		cpu_mitigations = CPU_MITIGATIONS_OFF;
2600 	else if (!strcmp(arg, "auto"))
2601 		cpu_mitigations = CPU_MITIGATIONS_AUTO;
2602 	else if (!strcmp(arg, "auto,nosmt"))
2603 		cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2604 	else
2605 		pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
2606 			arg);
2607 
2608 	return 0;
2609 }
2610 early_param("mitigations", mitigations_parse_cmdline);
2611 
2612 /* mitigations=off */
cpu_mitigations_off(void)2613 bool cpu_mitigations_off(void)
2614 {
2615 	return cpu_mitigations == CPU_MITIGATIONS_OFF;
2616 }
2617 EXPORT_SYMBOL_GPL(cpu_mitigations_off);
2618 
2619 /* mitigations=auto,nosmt */
cpu_mitigations_auto_nosmt(void)2620 bool cpu_mitigations_auto_nosmt(void)
2621 {
2622 	return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
2623 }
2624 EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
2625