• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *	linux/kernel/softirq.c
4  *
5  *	Copyright (C) 1992 Linus Torvalds
6  *
7  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
8  */
9 
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 
12 #include <linux/export.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/interrupt.h>
15 #include <linux/init.h>
16 #include <linux/local_lock.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 #include <linux/irq.h>
29 #include <linux/wait_bit.h>
30 #include <linux/workqueue.h>
31 
32 #include <asm/softirq_stack.h>
33 
34 #define CREATE_TRACE_POINTS
35 #include <trace/events/irq.h>
36 
37 EXPORT_TRACEPOINT_SYMBOL_GPL(irq_handler_entry);
38 EXPORT_TRACEPOINT_SYMBOL_GPL(irq_handler_exit);
39 EXPORT_TRACEPOINT_SYMBOL_GPL(softirq_entry);
40 EXPORT_TRACEPOINT_SYMBOL_GPL(softirq_exit);
41 EXPORT_TRACEPOINT_SYMBOL_GPL(tasklet_entry);
42 EXPORT_TRACEPOINT_SYMBOL_GPL(tasklet_exit);
43 
44 /*
45    - No shared variables, all the data are CPU local.
46    - If a softirq needs serialization, let it serialize itself
47      by its own spinlocks.
48    - Even if softirq is serialized, only local cpu is marked for
49      execution. Hence, we get something sort of weak cpu binding.
50      Though it is still not clear, will it result in better locality
51      or will not.
52 
53    Examples:
54    - NET RX softirq. It is multithreaded and does not require
55      any global serialization.
56    - NET TX softirq. It kicks software netdevice queues, hence
57      it is logically serialized per device, but this serialization
58      is invisible to common code.
59    - Tasklets: serialized wrt itself.
60  */
61 
62 #ifndef __ARCH_IRQ_STAT
63 DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
64 EXPORT_PER_CPU_SYMBOL(irq_stat);
65 #endif
66 
67 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
68 
69 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
70 EXPORT_PER_CPU_SYMBOL_GPL(ksoftirqd);
71 
72 #ifdef CONFIG_RT_SOFTIRQ_AWARE_SCHED
73 /*
74  * active_softirqs -- per cpu, a mask of softirqs that are being handled,
75  * with the expectation that approximate answers are acceptable and therefore
76  * no synchronization.
77  */
78 DEFINE_PER_CPU(u32, active_softirqs);
set_active_softirqs(u32 pending)79 static inline void set_active_softirqs(u32 pending)
80 {
81 	__this_cpu_write(active_softirqs, pending);
82 }
83 #else /* CONFIG_RT_SOFTIRQ_AWARE_SCHED */
set_active_softirqs(u32 pending)84 static inline void set_active_softirqs(u32 pending) {};
85 #endif /* CONFIG_RT_SOFTIRQ_AWARE_SCHED */
86 
87 const char * const softirq_to_name[NR_SOFTIRQS] = {
88 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
89 	"TASKLET", "SCHED", "HRTIMER", "RCU"
90 };
91 
92 /*
93  * we cannot loop indefinitely here to avoid userspace starvation,
94  * but we also don't want to introduce a worst case 1/HZ latency
95  * to the pending events, so lets the scheduler to balance
96  * the softirq load for us.
97  */
wakeup_softirqd(void)98 static void wakeup_softirqd(void)
99 {
100 	/* Interrupts are disabled: no need to stop preemption */
101 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
102 
103 	if (tsk)
104 		wake_up_process(tsk);
105 }
106 
107 #ifdef CONFIG_TRACE_IRQFLAGS
108 DEFINE_PER_CPU(int, hardirqs_enabled);
109 DEFINE_PER_CPU(int, hardirq_context);
110 EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
111 EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
112 #endif
113 
114 /*
115  * SOFTIRQ_OFFSET usage:
116  *
117  * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
118  * to a per CPU counter and to task::softirqs_disabled_cnt.
119  *
120  * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
121  *   processing.
122  *
123  * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
124  *   on local_bh_disable or local_bh_enable.
125  *
126  * This lets us distinguish between whether we are currently processing
127  * softirq and whether we just have bh disabled.
128  */
129 #ifdef CONFIG_PREEMPT_RT
130 
131 /*
132  * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
133  * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
134  * softirq disabled section to be preempted.
135  *
136  * The per task counter is used for softirq_count(), in_softirq() and
137  * in_serving_softirqs() because these counts are only valid when the task
138  * holding softirq_ctrl::lock is running.
139  *
140  * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
141  * the task which is in a softirq disabled section is preempted or blocks.
142  */
143 struct softirq_ctrl {
144 	local_lock_t	lock;
145 	int		cnt;
146 };
147 
148 static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
149 	.lock	= INIT_LOCAL_LOCK(softirq_ctrl.lock),
150 };
151 
152 #ifdef CONFIG_DEBUG_LOCK_ALLOC
153 static struct lock_class_key bh_lock_key;
154 struct lockdep_map bh_lock_map = {
155 	.name			= "local_bh",
156 	.key			= &bh_lock_key,
157 	.wait_type_outer	= LD_WAIT_FREE,
158 	.wait_type_inner	= LD_WAIT_CONFIG, /* PREEMPT_RT makes BH preemptible. */
159 	.lock_type		= LD_LOCK_PERCPU,
160 };
161 EXPORT_SYMBOL_GPL(bh_lock_map);
162 #endif
163 
164 /**
165  * local_bh_blocked() - Check for idle whether BH processing is blocked
166  *
167  * Returns false if the per CPU softirq::cnt is 0 otherwise true.
168  *
169  * This is invoked from the idle task to guard against false positive
170  * softirq pending warnings, which would happen when the task which holds
171  * softirq_ctrl::lock was the only running task on the CPU and blocks on
172  * some other lock.
173  */
local_bh_blocked(void)174 bool local_bh_blocked(void)
175 {
176 	return __this_cpu_read(softirq_ctrl.cnt) != 0;
177 }
178 
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)179 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
180 {
181 	unsigned long flags;
182 	int newcnt;
183 
184 	WARN_ON_ONCE(in_hardirq());
185 
186 	lock_map_acquire_read(&bh_lock_map);
187 
188 	/* First entry of a task into a BH disabled section? */
189 	if (!current->softirq_disable_cnt) {
190 		if (preemptible()) {
191 			local_lock(&softirq_ctrl.lock);
192 			/* Required to meet the RCU bottomhalf requirements. */
193 			rcu_read_lock();
194 		} else {
195 			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
196 		}
197 	}
198 
199 	/*
200 	 * Track the per CPU softirq disabled state. On RT this is per CPU
201 	 * state to allow preemption of bottom half disabled sections.
202 	 */
203 	newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
204 	/*
205 	 * Reflect the result in the task state to prevent recursion on the
206 	 * local lock and to make softirq_count() & al work.
207 	 */
208 	current->softirq_disable_cnt = newcnt;
209 
210 	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
211 		raw_local_irq_save(flags);
212 		lockdep_softirqs_off(ip);
213 		raw_local_irq_restore(flags);
214 	}
215 }
216 EXPORT_SYMBOL(__local_bh_disable_ip);
217 
__local_bh_enable(unsigned int cnt,bool unlock)218 static void __local_bh_enable(unsigned int cnt, bool unlock)
219 {
220 	unsigned long flags;
221 	int newcnt;
222 
223 	DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
224 			    this_cpu_read(softirq_ctrl.cnt));
225 
226 	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
227 		raw_local_irq_save(flags);
228 		lockdep_softirqs_on(_RET_IP_);
229 		raw_local_irq_restore(flags);
230 	}
231 
232 	newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
233 	current->softirq_disable_cnt = newcnt;
234 
235 	if (!newcnt && unlock) {
236 		rcu_read_unlock();
237 		local_unlock(&softirq_ctrl.lock);
238 	}
239 }
240 
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)241 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
242 {
243 	bool preempt_on = preemptible();
244 	unsigned long flags;
245 	u32 pending;
246 	int curcnt;
247 
248 	WARN_ON_ONCE(in_hardirq());
249 	lockdep_assert_irqs_enabled();
250 
251 	lock_map_release(&bh_lock_map);
252 
253 	local_irq_save(flags);
254 	curcnt = __this_cpu_read(softirq_ctrl.cnt);
255 
256 	/*
257 	 * If this is not reenabling soft interrupts, no point in trying to
258 	 * run pending ones.
259 	 */
260 	if (curcnt != cnt)
261 		goto out;
262 
263 	pending = local_softirq_pending();
264 	if (!pending)
265 		goto out;
266 
267 	/*
268 	 * If this was called from non preemptible context, wake up the
269 	 * softirq daemon.
270 	 */
271 	if (!preempt_on) {
272 		wakeup_softirqd();
273 		goto out;
274 	}
275 
276 	/*
277 	 * Adjust softirq count to SOFTIRQ_OFFSET which makes
278 	 * in_serving_softirq() become true.
279 	 */
280 	cnt = SOFTIRQ_OFFSET;
281 	__local_bh_enable(cnt, false);
282 	__do_softirq();
283 
284 out:
285 	__local_bh_enable(cnt, preempt_on);
286 	local_irq_restore(flags);
287 }
288 EXPORT_SYMBOL(__local_bh_enable_ip);
289 
290 /*
291  * Invoked from ksoftirqd_run() outside of the interrupt disabled section
292  * to acquire the per CPU local lock for reentrancy protection.
293  */
ksoftirqd_run_begin(void)294 static inline void ksoftirqd_run_begin(void)
295 {
296 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
297 	local_irq_disable();
298 }
299 
300 /* Counterpart to ksoftirqd_run_begin() */
ksoftirqd_run_end(void)301 static inline void ksoftirqd_run_end(void)
302 {
303 	/* pairs with the lock_map_acquire_read() in ksoftirqd_run_begin() */
304 	lock_map_release(&bh_lock_map);
305 	__local_bh_enable(SOFTIRQ_OFFSET, true);
306 	WARN_ON_ONCE(in_interrupt());
307 	local_irq_enable();
308 }
309 
softirq_handle_begin(void)310 static inline void softirq_handle_begin(void) { }
softirq_handle_end(void)311 static inline void softirq_handle_end(void) { }
312 
should_wake_ksoftirqd(void)313 static inline bool should_wake_ksoftirqd(void)
314 {
315 	return !this_cpu_read(softirq_ctrl.cnt);
316 }
317 
invoke_softirq(void)318 static inline void invoke_softirq(void)
319 {
320 	if (should_wake_ksoftirqd())
321 		wakeup_softirqd();
322 }
323 
324 #define SCHED_SOFTIRQ_MASK	BIT(SCHED_SOFTIRQ)
325 
326 /*
327  * flush_smp_call_function_queue() can raise a soft interrupt in a function
328  * call. On RT kernels this is undesired and the only known functionalities
329  * are in the block layer which is disabled on RT, and in the scheduler for
330  * idle load balancing. If soft interrupts get raised which haven't been
331  * raised before the flush, warn if it is not a SCHED_SOFTIRQ so it can be
332  * investigated.
333  */
do_softirq_post_smp_call_flush(unsigned int was_pending)334 void do_softirq_post_smp_call_flush(unsigned int was_pending)
335 {
336 	unsigned int is_pending = local_softirq_pending();
337 
338 	if (unlikely(was_pending != is_pending)) {
339 		WARN_ON_ONCE(was_pending != (is_pending & ~SCHED_SOFTIRQ_MASK));
340 		invoke_softirq();
341 	}
342 }
343 
344 #else /* CONFIG_PREEMPT_RT */
345 
346 /*
347  * This one is for softirq.c-internal use, where hardirqs are disabled
348  * legitimately:
349  */
350 #ifdef CONFIG_TRACE_IRQFLAGS
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)351 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
352 {
353 	unsigned long flags;
354 
355 	WARN_ON_ONCE(in_hardirq());
356 
357 	raw_local_irq_save(flags);
358 	/*
359 	 * The preempt tracer hooks into preempt_count_add and will break
360 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
361 	 * is set and before current->softirq_enabled is cleared.
362 	 * We must manually increment preempt_count here and manually
363 	 * call the trace_preempt_off later.
364 	 */
365 	__preempt_count_add(cnt);
366 	/*
367 	 * Were softirqs turned off above:
368 	 */
369 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
370 		lockdep_softirqs_off(ip);
371 	raw_local_irq_restore(flags);
372 
373 	if (preempt_count() == cnt) {
374 #ifdef CONFIG_DEBUG_PREEMPT
375 		current->preempt_disable_ip = get_lock_parent_ip();
376 #endif
377 		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
378 	}
379 }
380 EXPORT_SYMBOL(__local_bh_disable_ip);
381 #endif /* CONFIG_TRACE_IRQFLAGS */
382 
__local_bh_enable(unsigned int cnt)383 static void __local_bh_enable(unsigned int cnt)
384 {
385 	lockdep_assert_irqs_disabled();
386 
387 	if (preempt_count() == cnt)
388 		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
389 
390 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
391 		lockdep_softirqs_on(_RET_IP_);
392 
393 	__preempt_count_sub(cnt);
394 }
395 
396 /*
397  * Special-case - softirqs can safely be enabled by __do_softirq(),
398  * without processing still-pending softirqs:
399  */
_local_bh_enable(void)400 void _local_bh_enable(void)
401 {
402 	WARN_ON_ONCE(in_hardirq());
403 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
404 }
405 EXPORT_SYMBOL(_local_bh_enable);
406 
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)407 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
408 {
409 	WARN_ON_ONCE(in_hardirq());
410 	lockdep_assert_irqs_enabled();
411 #ifdef CONFIG_TRACE_IRQFLAGS
412 	local_irq_disable();
413 #endif
414 	/*
415 	 * Are softirqs going to be turned on now:
416 	 */
417 	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
418 		lockdep_softirqs_on(ip);
419 	/*
420 	 * Keep preemption disabled until we are done with
421 	 * softirq processing:
422 	 */
423 	__preempt_count_sub(cnt - 1);
424 
425 	if (unlikely(!in_interrupt() && local_softirq_pending())) {
426 		/*
427 		 * Run softirq if any pending. And do it in its own stack
428 		 * as we may be calling this deep in a task call stack already.
429 		 */
430 		do_softirq();
431 	}
432 
433 	preempt_count_dec();
434 #ifdef CONFIG_TRACE_IRQFLAGS
435 	local_irq_enable();
436 #endif
437 	preempt_check_resched();
438 }
439 EXPORT_SYMBOL(__local_bh_enable_ip);
440 
softirq_handle_begin(void)441 static inline void softirq_handle_begin(void)
442 {
443 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
444 }
445 
softirq_handle_end(void)446 static inline void softirq_handle_end(void)
447 {
448 	__local_bh_enable(SOFTIRQ_OFFSET);
449 	WARN_ON_ONCE(in_interrupt());
450 }
451 
ksoftirqd_run_begin(void)452 static inline void ksoftirqd_run_begin(void)
453 {
454 	local_irq_disable();
455 }
456 
ksoftirqd_run_end(void)457 static inline void ksoftirqd_run_end(void)
458 {
459 	local_irq_enable();
460 }
461 
should_wake_ksoftirqd(void)462 static inline bool should_wake_ksoftirqd(void)
463 {
464 	return true;
465 }
466 
invoke_softirq(void)467 static inline void invoke_softirq(void)
468 {
469 	if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
470 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
471 		/*
472 		 * We can safely execute softirq on the current stack if
473 		 * it is the irq stack, because it should be near empty
474 		 * at this stage.
475 		 */
476 		__do_softirq();
477 #else
478 		/*
479 		 * Otherwise, irq_exit() is called on the task stack that can
480 		 * be potentially deep already. So call softirq in its own stack
481 		 * to prevent from any overrun.
482 		 */
483 		do_softirq_own_stack();
484 #endif
485 	} else {
486 		wakeup_softirqd();
487 	}
488 }
489 
do_softirq(void)490 asmlinkage __visible void do_softirq(void)
491 {
492 	__u32 pending;
493 	unsigned long flags;
494 
495 	if (in_interrupt())
496 		return;
497 
498 	local_irq_save(flags);
499 
500 	pending = local_softirq_pending();
501 
502 	if (pending)
503 		do_softirq_own_stack();
504 
505 	local_irq_restore(flags);
506 }
507 
508 #endif /* !CONFIG_PREEMPT_RT */
509 
510 /*
511  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
512  * but break the loop if need_resched() is set or after 2 ms.
513  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
514  * certain cases, such as stop_machine(), jiffies may cease to
515  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
516  * well to make sure we eventually return from this method.
517  *
518  * These limits have been established via experimentation.
519  * The two things to balance is latency against fairness -
520  * we want to handle softirqs as soon as possible, but they
521  * should not be able to lock up the box.
522  */
523 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
524 #define MAX_SOFTIRQ_RESTART 10
525 
526 #ifdef CONFIG_TRACE_IRQFLAGS
527 /*
528  * When we run softirqs from irq_exit() and thus on the hardirq stack we need
529  * to keep the lockdep irq context tracking as tight as possible in order to
530  * not miss-qualify lock contexts and miss possible deadlocks.
531  */
532 
lockdep_softirq_start(void)533 static inline bool lockdep_softirq_start(void)
534 {
535 	bool in_hardirq = false;
536 
537 	if (lockdep_hardirq_context()) {
538 		in_hardirq = true;
539 		lockdep_hardirq_exit();
540 	}
541 
542 	lockdep_softirq_enter();
543 
544 	return in_hardirq;
545 }
546 
lockdep_softirq_end(bool in_hardirq)547 static inline void lockdep_softirq_end(bool in_hardirq)
548 {
549 	lockdep_softirq_exit();
550 
551 	if (in_hardirq)
552 		lockdep_hardirq_enter();
553 }
554 #else
lockdep_softirq_start(void)555 static inline bool lockdep_softirq_start(void) { return false; }
lockdep_softirq_end(bool in_hardirq)556 static inline void lockdep_softirq_end(bool in_hardirq) { }
557 #endif
558 
559 #ifdef CONFIG_RT_SOFTIRQ_AWARE_SCHED
softirq_deferred_for_rt(__u32 * pending)560 static __u32 softirq_deferred_for_rt(__u32 *pending)
561 {
562 	__u32 deferred = 0;
563 
564 	if (rt_task(current)) {
565 		deferred = *pending & LONG_SOFTIRQ_MASK;
566 		*pending &= ~LONG_SOFTIRQ_MASK;
567 	}
568 	return deferred;
569 }
570 #else
571 #define softirq_deferred_for_rt(x) (0)
572 #endif
573 
handle_softirqs(bool ksirqd)574 static void handle_softirqs(bool ksirqd)
575 {
576 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
577 	unsigned long old_flags = current->flags;
578 	int max_restart = MAX_SOFTIRQ_RESTART;
579 	struct softirq_action *h;
580 	bool in_hardirq;
581 	__u32 deferred;
582 	__u32 pending;
583 	int softirq_bit;
584 
585 	/*
586 	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
587 	 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
588 	 * again if the socket is related to swapping.
589 	 */
590 	current->flags &= ~PF_MEMALLOC;
591 
592 	pending = local_softirq_pending();
593 	deferred = softirq_deferred_for_rt(&pending);
594 
595 	softirq_handle_begin();
596 
597 	in_hardirq = lockdep_softirq_start();
598 	account_softirq_enter(current);
599 
600 restart:
601 	/* Reset the pending bitmask before enabling irqs */
602 	set_softirq_pending(deferred);
603 	set_active_softirqs(pending);
604 
605 	local_irq_enable();
606 
607 	h = softirq_vec;
608 
609 	while ((softirq_bit = ffs(pending))) {
610 		unsigned int vec_nr;
611 		int prev_count;
612 
613 		h += softirq_bit - 1;
614 
615 		vec_nr = h - softirq_vec;
616 		prev_count = preempt_count();
617 
618 		kstat_incr_softirqs_this_cpu(vec_nr);
619 
620 		trace_softirq_entry(vec_nr);
621 		h->action();
622 		trace_softirq_exit(vec_nr);
623 		if (unlikely(prev_count != preempt_count())) {
624 			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
625 			       vec_nr, softirq_to_name[vec_nr], h->action,
626 			       prev_count, preempt_count());
627 			preempt_count_set(prev_count);
628 		}
629 		h++;
630 		pending >>= softirq_bit;
631 	}
632 
633 	set_active_softirqs(0);
634 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd)
635 		rcu_softirq_qs();
636 
637 	local_irq_disable();
638 
639 	pending = local_softirq_pending();
640 	deferred = softirq_deferred_for_rt(&pending);
641 
642 	if (pending) {
643 		if (time_before(jiffies, end) && !need_resched() &&
644 		    --max_restart)
645 			goto restart;
646 	}
647 
648 	if (pending | deferred)
649 		wakeup_softirqd();
650 
651 	account_softirq_exit(current);
652 	lockdep_softirq_end(in_hardirq);
653 	softirq_handle_end();
654 	current_restore_flags(old_flags, PF_MEMALLOC);
655 }
656 
__do_softirq(void)657 asmlinkage __visible void __softirq_entry __do_softirq(void)
658 {
659 	handle_softirqs(false);
660 }
661 
662 /**
663  * irq_enter_rcu - Enter an interrupt context with RCU watching
664  */
irq_enter_rcu(void)665 void irq_enter_rcu(void)
666 {
667 	__irq_enter_raw();
668 
669 	if (tick_nohz_full_cpu(smp_processor_id()) ||
670 	    (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
671 		tick_irq_enter();
672 
673 	account_hardirq_enter(current);
674 }
675 
676 /**
677  * irq_enter - Enter an interrupt context including RCU update
678  */
irq_enter(void)679 void irq_enter(void)
680 {
681 	ct_irq_enter();
682 	irq_enter_rcu();
683 }
684 
tick_irq_exit(void)685 static inline void tick_irq_exit(void)
686 {
687 #ifdef CONFIG_NO_HZ_COMMON
688 	int cpu = smp_processor_id();
689 
690 	/* Make sure that timer wheel updates are propagated */
691 	if ((sched_core_idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
692 		if (!in_hardirq())
693 			tick_nohz_irq_exit();
694 	}
695 #endif
696 }
697 
__irq_exit_rcu(void)698 static inline void __irq_exit_rcu(void)
699 {
700 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
701 	local_irq_disable();
702 #else
703 	lockdep_assert_irqs_disabled();
704 #endif
705 	account_hardirq_exit(current);
706 	preempt_count_sub(HARDIRQ_OFFSET);
707 	if (!in_interrupt() && local_softirq_pending())
708 		invoke_softirq();
709 
710 	tick_irq_exit();
711 }
712 
713 /**
714  * irq_exit_rcu() - Exit an interrupt context without updating RCU
715  *
716  * Also processes softirqs if needed and possible.
717  */
irq_exit_rcu(void)718 void irq_exit_rcu(void)
719 {
720 	__irq_exit_rcu();
721 	 /* must be last! */
722 	lockdep_hardirq_exit();
723 }
724 
725 /**
726  * irq_exit - Exit an interrupt context, update RCU and lockdep
727  *
728  * Also processes softirqs if needed and possible.
729  */
irq_exit(void)730 void irq_exit(void)
731 {
732 	__irq_exit_rcu();
733 	ct_irq_exit();
734 	 /* must be last! */
735 	lockdep_hardirq_exit();
736 }
737 
738 /*
739  * This function must run with irqs disabled!
740  */
raise_softirq_irqoff(unsigned int nr)741 inline void raise_softirq_irqoff(unsigned int nr)
742 {
743 	__raise_softirq_irqoff(nr);
744 
745 	/*
746 	 * If we're in an interrupt or softirq, we're done
747 	 * (this also catches softirq-disabled code). We will
748 	 * actually run the softirq once we return from
749 	 * the irq or softirq.
750 	 *
751 	 * Otherwise we wake up ksoftirqd to make sure we
752 	 * schedule the softirq soon.
753 	 */
754 	if (!in_interrupt() && should_wake_ksoftirqd())
755 		wakeup_softirqd();
756 }
757 
raise_softirq(unsigned int nr)758 void raise_softirq(unsigned int nr)
759 {
760 	unsigned long flags;
761 
762 	local_irq_save(flags);
763 	raise_softirq_irqoff(nr);
764 	local_irq_restore(flags);
765 }
766 EXPORT_SYMBOL_GPL(raise_softirq);
767 
__raise_softirq_irqoff(unsigned int nr)768 void __raise_softirq_irqoff(unsigned int nr)
769 {
770 	lockdep_assert_irqs_disabled();
771 	trace_softirq_raise(nr);
772 	or_softirq_pending(1UL << nr);
773 }
774 
open_softirq(int nr,void (* action)(void))775 void open_softirq(int nr, void (*action)(void))
776 {
777 	softirq_vec[nr].action = action;
778 }
779 
780 /*
781  * Tasklets
782  */
783 struct tasklet_head {
784 	struct tasklet_struct *head;
785 	struct tasklet_struct **tail;
786 };
787 
788 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
789 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
790 
__tasklet_schedule_common(struct tasklet_struct * t,struct tasklet_head __percpu * headp,unsigned int softirq_nr)791 static void __tasklet_schedule_common(struct tasklet_struct *t,
792 				      struct tasklet_head __percpu *headp,
793 				      unsigned int softirq_nr)
794 {
795 	struct tasklet_head *head;
796 	unsigned long flags;
797 
798 	local_irq_save(flags);
799 	head = this_cpu_ptr(headp);
800 	t->next = NULL;
801 	*head->tail = t;
802 	head->tail = &(t->next);
803 	raise_softirq_irqoff(softirq_nr);
804 	local_irq_restore(flags);
805 }
806 
__tasklet_schedule(struct tasklet_struct * t)807 void __tasklet_schedule(struct tasklet_struct *t)
808 {
809 	__tasklet_schedule_common(t, &tasklet_vec,
810 				  TASKLET_SOFTIRQ);
811 }
812 EXPORT_SYMBOL(__tasklet_schedule);
813 
__tasklet_hi_schedule(struct tasklet_struct * t)814 void __tasklet_hi_schedule(struct tasklet_struct *t)
815 {
816 	__tasklet_schedule_common(t, &tasklet_hi_vec,
817 				  HI_SOFTIRQ);
818 }
819 EXPORT_SYMBOL(__tasklet_hi_schedule);
820 
tasklet_clear_sched(struct tasklet_struct * t)821 static bool tasklet_clear_sched(struct tasklet_struct *t)
822 {
823 	if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
824 		wake_up_var(&t->state);
825 		return true;
826 	}
827 
828 	WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
829 		  t->use_callback ? "callback" : "func",
830 		  t->use_callback ? (void *)t->callback : (void *)t->func);
831 
832 	return false;
833 }
834 
tasklet_action_common(struct tasklet_head * tl_head,unsigned int softirq_nr)835 static void tasklet_action_common(struct tasklet_head *tl_head,
836 				  unsigned int softirq_nr)
837 {
838 	struct tasklet_struct *list;
839 
840 	local_irq_disable();
841 	list = tl_head->head;
842 	tl_head->head = NULL;
843 	tl_head->tail = &tl_head->head;
844 	local_irq_enable();
845 
846 	while (list) {
847 		struct tasklet_struct *t = list;
848 
849 		list = list->next;
850 
851 		if (tasklet_trylock(t)) {
852 			if (!atomic_read(&t->count)) {
853 				if (tasklet_clear_sched(t)) {
854 					if (t->use_callback) {
855 						trace_tasklet_entry(t, t->callback);
856 						t->callback(t);
857 						trace_tasklet_exit(t, t->callback);
858 					} else {
859 						trace_tasklet_entry(t, t->func);
860 						t->func(t->data);
861 						trace_tasklet_exit(t, t->func);
862 					}
863 				}
864 				tasklet_unlock(t);
865 				continue;
866 			}
867 			tasklet_unlock(t);
868 		}
869 
870 		local_irq_disable();
871 		t->next = NULL;
872 		*tl_head->tail = t;
873 		tl_head->tail = &t->next;
874 		__raise_softirq_irqoff(softirq_nr);
875 		local_irq_enable();
876 	}
877 }
878 
tasklet_action(void)879 static __latent_entropy void tasklet_action(void)
880 {
881 	workqueue_softirq_action(false);
882 	tasklet_action_common(this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
883 }
884 
tasklet_hi_action(void)885 static __latent_entropy void tasklet_hi_action(void)
886 {
887 	workqueue_softirq_action(true);
888 	tasklet_action_common(this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
889 }
890 
tasklet_setup(struct tasklet_struct * t,void (* callback)(struct tasklet_struct *))891 void tasklet_setup(struct tasklet_struct *t,
892 		   void (*callback)(struct tasklet_struct *))
893 {
894 	t->next = NULL;
895 	t->state = 0;
896 	atomic_set(&t->count, 0);
897 	t->callback = callback;
898 	t->use_callback = true;
899 	t->data = 0;
900 }
901 EXPORT_SYMBOL(tasklet_setup);
902 
tasklet_init(struct tasklet_struct * t,void (* func)(unsigned long),unsigned long data)903 void tasklet_init(struct tasklet_struct *t,
904 		  void (*func)(unsigned long), unsigned long data)
905 {
906 	t->next = NULL;
907 	t->state = 0;
908 	atomic_set(&t->count, 0);
909 	t->func = func;
910 	t->use_callback = false;
911 	t->data = data;
912 }
913 EXPORT_SYMBOL(tasklet_init);
914 
915 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
916 /*
917  * Do not use in new code. Waiting for tasklets from atomic contexts is
918  * error prone and should be avoided.
919  */
tasklet_unlock_spin_wait(struct tasklet_struct * t)920 void tasklet_unlock_spin_wait(struct tasklet_struct *t)
921 {
922 	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
923 		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
924 			/*
925 			 * Prevent a live lock when current preempted soft
926 			 * interrupt processing or prevents ksoftirqd from
927 			 * running. If the tasklet runs on a different CPU
928 			 * then this has no effect other than doing the BH
929 			 * disable/enable dance for nothing.
930 			 */
931 			local_bh_disable();
932 			local_bh_enable();
933 		} else {
934 			cpu_relax();
935 		}
936 	}
937 }
938 EXPORT_SYMBOL(tasklet_unlock_spin_wait);
939 #endif
940 
tasklet_kill(struct tasklet_struct * t)941 void tasklet_kill(struct tasklet_struct *t)
942 {
943 	if (in_interrupt())
944 		pr_notice("Attempt to kill tasklet from interrupt\n");
945 
946 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
947 		wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
948 
949 	tasklet_unlock_wait(t);
950 	tasklet_clear_sched(t);
951 }
952 EXPORT_SYMBOL(tasklet_kill);
953 
954 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
tasklet_unlock(struct tasklet_struct * t)955 void tasklet_unlock(struct tasklet_struct *t)
956 {
957 	smp_mb__before_atomic();
958 	clear_bit(TASKLET_STATE_RUN, &t->state);
959 	smp_mb__after_atomic();
960 	wake_up_var(&t->state);
961 }
962 EXPORT_SYMBOL_GPL(tasklet_unlock);
963 
tasklet_unlock_wait(struct tasklet_struct * t)964 void tasklet_unlock_wait(struct tasklet_struct *t)
965 {
966 	wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
967 }
968 EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
969 #endif
970 
softirq_init(void)971 void __init softirq_init(void)
972 {
973 	int cpu;
974 
975 	for_each_possible_cpu(cpu) {
976 		per_cpu(tasklet_vec, cpu).tail =
977 			&per_cpu(tasklet_vec, cpu).head;
978 		per_cpu(tasklet_hi_vec, cpu).tail =
979 			&per_cpu(tasklet_hi_vec, cpu).head;
980 	}
981 
982 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
983 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
984 }
985 
ksoftirqd_should_run(unsigned int cpu)986 static int ksoftirqd_should_run(unsigned int cpu)
987 {
988 	return local_softirq_pending();
989 }
990 
run_ksoftirqd(unsigned int cpu)991 static void run_ksoftirqd(unsigned int cpu)
992 {
993 	ksoftirqd_run_begin();
994 	if (local_softirq_pending()) {
995 		/*
996 		 * We can safely run softirq on inline stack, as we are not deep
997 		 * in the task stack here.
998 		 */
999 		handle_softirqs(true);
1000 		ksoftirqd_run_end();
1001 		cond_resched();
1002 		return;
1003 	}
1004 	ksoftirqd_run_end();
1005 }
1006 
1007 #ifdef CONFIG_HOTPLUG_CPU
takeover_tasklets(unsigned int cpu)1008 static int takeover_tasklets(unsigned int cpu)
1009 {
1010 	workqueue_softirq_dead(cpu);
1011 
1012 	/* CPU is dead, so no lock needed. */
1013 	local_irq_disable();
1014 
1015 	/* Find end, append list for that CPU. */
1016 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
1017 		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
1018 		__this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
1019 		per_cpu(tasklet_vec, cpu).head = NULL;
1020 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
1021 	}
1022 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
1023 
1024 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
1025 		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
1026 		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
1027 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
1028 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
1029 	}
1030 	raise_softirq_irqoff(HI_SOFTIRQ);
1031 
1032 	local_irq_enable();
1033 	return 0;
1034 }
1035 #else
1036 #define takeover_tasklets	NULL
1037 #endif /* CONFIG_HOTPLUG_CPU */
1038 
1039 static struct smp_hotplug_thread softirq_threads = {
1040 	.store			= &ksoftirqd,
1041 	.thread_should_run	= ksoftirqd_should_run,
1042 	.thread_fn		= run_ksoftirqd,
1043 	.thread_comm		= "ksoftirqd/%u",
1044 };
1045 
spawn_ksoftirqd(void)1046 static __init int spawn_ksoftirqd(void)
1047 {
1048 	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
1049 				  takeover_tasklets);
1050 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
1051 
1052 	return 0;
1053 }
1054 early_initcall(spawn_ksoftirqd);
1055 
1056 /*
1057  * [ These __weak aliases are kept in a separate compilation unit, so that
1058  *   GCC does not inline them incorrectly. ]
1059  */
1060 
early_irq_init(void)1061 int __init __weak early_irq_init(void)
1062 {
1063 	return 0;
1064 }
1065 
arch_probe_nr_irqs(void)1066 int __init __weak arch_probe_nr_irqs(void)
1067 {
1068 	return NR_IRQS_LEGACY;
1069 }
1070 
arch_early_irq_init(void)1071 int __init __weak arch_early_irq_init(void)
1072 {
1073 	return 0;
1074 }
1075 
arch_dynirq_lower_bound(unsigned int from)1076 unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1077 {
1078 	return from;
1079 }
1080