• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *	linux/kernel/softirq.c
4  *
5  *	Copyright (C) 1992 Linus Torvalds
6  *
7  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
8  */
9 
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 
12 #include <linux/export.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/interrupt.h>
15 #include <linux/init.h>
16 #include <linux/local_lock.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 #include <linux/irq.h>
29 #include <linux/wait_bit.h>
30 
31 #include <asm/softirq_stack.h>
32 
33 #define CREATE_TRACE_POINTS
34 #include <trace/events/irq.h>
35 
36 EXPORT_TRACEPOINT_SYMBOL_GPL(irq_handler_entry);
37 EXPORT_TRACEPOINT_SYMBOL_GPL(irq_handler_exit);
38 EXPORT_TRACEPOINT_SYMBOL_GPL(softirq_entry);
39 EXPORT_TRACEPOINT_SYMBOL_GPL(softirq_exit);
40 EXPORT_TRACEPOINT_SYMBOL_GPL(tasklet_entry);
41 EXPORT_TRACEPOINT_SYMBOL_GPL(tasklet_exit);
42 
43 /*
44    - No shared variables, all the data are CPU local.
45    - If a softirq needs serialization, let it serialize itself
46      by its own spinlocks.
47    - Even if softirq is serialized, only local cpu is marked for
48      execution. Hence, we get something sort of weak cpu binding.
49      Though it is still not clear, will it result in better locality
50      or will not.
51 
52    Examples:
53    - NET RX softirq. It is multithreaded and does not require
54      any global serialization.
55    - NET TX softirq. It kicks software netdevice queues, hence
56      it is logically serialized per device, but this serialization
57      is invisible to common code.
58    - Tasklets: serialized wrt itself.
59  */
60 
61 #ifndef __ARCH_IRQ_STAT
62 DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
63 EXPORT_PER_CPU_SYMBOL(irq_stat);
64 #endif
65 
66 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
67 
68 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
69 EXPORT_PER_CPU_SYMBOL_GPL(ksoftirqd);
70 
71 #ifdef CONFIG_RT_SOFTIRQ_AWARE_SCHED
72 /*
73  * active_softirqs -- per cpu, a mask of softirqs that are being handled,
74  * with the expectation that approximate answers are acceptable and therefore
75  * no synchronization.
76  */
77 DEFINE_PER_CPU(u32, active_softirqs);
set_active_softirqs(u32 pending)78 static inline void set_active_softirqs(u32 pending)
79 {
80 	__this_cpu_write(active_softirqs, pending);
81 }
82 #else /* CONFIG_RT_SOFTIRQ_AWARE_SCHED */
set_active_softirqs(u32 pending)83 static inline void set_active_softirqs(u32 pending) {};
84 #endif /* CONFIG_RT_SOFTIRQ_AWARE_SCHED */
85 
86 const char * const softirq_to_name[NR_SOFTIRQS] = {
87 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
88 	"TASKLET", "SCHED", "HRTIMER", "RCU"
89 };
90 
91 /*
92  * we cannot loop indefinitely here to avoid userspace starvation,
93  * but we also don't want to introduce a worst case 1/HZ latency
94  * to the pending events, so lets the scheduler to balance
95  * the softirq load for us.
96  */
wakeup_softirqd(void)97 static void wakeup_softirqd(void)
98 {
99 	/* Interrupts are disabled: no need to stop preemption */
100 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
101 
102 	if (tsk)
103 		wake_up_process(tsk);
104 }
105 
106 #ifndef CONFIG_RT_SOFTIRQ_AWARE_SCHED
107 /*
108  * If ksoftirqd is scheduled, we do not want to process pending softirqs
109  * right now. Let ksoftirqd handle this at its own rate, to get fairness,
110  * unless we're doing some of the synchronous softirqs.
111  */
112 #define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
ksoftirqd_running(unsigned long pending)113 static bool ksoftirqd_running(unsigned long pending)
114 {
115 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
116 
117 	if (pending & SOFTIRQ_NOW_MASK)
118 		return false;
119 	return tsk && task_is_running(tsk) && !__kthread_should_park(tsk);
120 }
121 #else
122 #define ksoftirqd_running(pending) (false)
123 #endif /* CONFIG_RT_SOFTIRQ_AWARE_SCHED */
124 
125 #ifdef CONFIG_TRACE_IRQFLAGS
126 DEFINE_PER_CPU(int, hardirqs_enabled);
127 DEFINE_PER_CPU(int, hardirq_context);
128 EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
129 EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
130 #endif
131 
132 /*
133  * SOFTIRQ_OFFSET usage:
134  *
135  * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
136  * to a per CPU counter and to task::softirqs_disabled_cnt.
137  *
138  * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
139  *   processing.
140  *
141  * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
142  *   on local_bh_disable or local_bh_enable.
143  *
144  * This lets us distinguish between whether we are currently processing
145  * softirq and whether we just have bh disabled.
146  */
147 #ifdef CONFIG_PREEMPT_RT
148 
149 /*
150  * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
151  * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
152  * softirq disabled section to be preempted.
153  *
154  * The per task counter is used for softirq_count(), in_softirq() and
155  * in_serving_softirqs() because these counts are only valid when the task
156  * holding softirq_ctrl::lock is running.
157  *
158  * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
159  * the task which is in a softirq disabled section is preempted or blocks.
160  */
161 struct softirq_ctrl {
162 	local_lock_t	lock;
163 	int		cnt;
164 };
165 
166 static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
167 	.lock	= INIT_LOCAL_LOCK(softirq_ctrl.lock),
168 };
169 
170 /**
171  * local_bh_blocked() - Check for idle whether BH processing is blocked
172  *
173  * Returns false if the per CPU softirq::cnt is 0 otherwise true.
174  *
175  * This is invoked from the idle task to guard against false positive
176  * softirq pending warnings, which would happen when the task which holds
177  * softirq_ctrl::lock was the only running task on the CPU and blocks on
178  * some other lock.
179  */
local_bh_blocked(void)180 bool local_bh_blocked(void)
181 {
182 	return __this_cpu_read(softirq_ctrl.cnt) != 0;
183 }
184 
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)185 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
186 {
187 	unsigned long flags;
188 	int newcnt;
189 
190 	WARN_ON_ONCE(in_hardirq());
191 
192 	/* First entry of a task into a BH disabled section? */
193 	if (!current->softirq_disable_cnt) {
194 		if (preemptible()) {
195 			local_lock(&softirq_ctrl.lock);
196 			/* Required to meet the RCU bottomhalf requirements. */
197 			rcu_read_lock();
198 		} else {
199 			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
200 		}
201 	}
202 
203 	/*
204 	 * Track the per CPU softirq disabled state. On RT this is per CPU
205 	 * state to allow preemption of bottom half disabled sections.
206 	 */
207 	newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
208 	/*
209 	 * Reflect the result in the task state to prevent recursion on the
210 	 * local lock and to make softirq_count() & al work.
211 	 */
212 	current->softirq_disable_cnt = newcnt;
213 
214 	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
215 		raw_local_irq_save(flags);
216 		lockdep_softirqs_off(ip);
217 		raw_local_irq_restore(flags);
218 	}
219 }
220 EXPORT_SYMBOL(__local_bh_disable_ip);
221 
__local_bh_enable(unsigned int cnt,bool unlock)222 static void __local_bh_enable(unsigned int cnt, bool unlock)
223 {
224 	unsigned long flags;
225 	int newcnt;
226 
227 	DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
228 			    this_cpu_read(softirq_ctrl.cnt));
229 
230 	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
231 		raw_local_irq_save(flags);
232 		lockdep_softirqs_on(_RET_IP_);
233 		raw_local_irq_restore(flags);
234 	}
235 
236 	newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
237 	current->softirq_disable_cnt = newcnt;
238 
239 	if (!newcnt && unlock) {
240 		rcu_read_unlock();
241 		local_unlock(&softirq_ctrl.lock);
242 	}
243 }
244 
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)245 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
246 {
247 	bool preempt_on = preemptible();
248 	unsigned long flags;
249 	u32 pending;
250 	int curcnt;
251 
252 	WARN_ON_ONCE(in_hardirq());
253 	lockdep_assert_irqs_enabled();
254 
255 	local_irq_save(flags);
256 	curcnt = __this_cpu_read(softirq_ctrl.cnt);
257 
258 	/*
259 	 * If this is not reenabling soft interrupts, no point in trying to
260 	 * run pending ones.
261 	 */
262 	if (curcnt != cnt)
263 		goto out;
264 
265 	pending = local_softirq_pending();
266 	if (!pending || ksoftirqd_running(pending))
267 		goto out;
268 
269 	/*
270 	 * If this was called from non preemptible context, wake up the
271 	 * softirq daemon.
272 	 */
273 	if (!preempt_on) {
274 		wakeup_softirqd();
275 		goto out;
276 	}
277 
278 	/*
279 	 * Adjust softirq count to SOFTIRQ_OFFSET which makes
280 	 * in_serving_softirq() become true.
281 	 */
282 	cnt = SOFTIRQ_OFFSET;
283 	__local_bh_enable(cnt, false);
284 	__do_softirq();
285 
286 out:
287 	__local_bh_enable(cnt, preempt_on);
288 	local_irq_restore(flags);
289 }
290 EXPORT_SYMBOL(__local_bh_enable_ip);
291 
292 /*
293  * Invoked from ksoftirqd_run() outside of the interrupt disabled section
294  * to acquire the per CPU local lock for reentrancy protection.
295  */
ksoftirqd_run_begin(void)296 static inline void ksoftirqd_run_begin(void)
297 {
298 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
299 	local_irq_disable();
300 }
301 
302 /* Counterpart to ksoftirqd_run_begin() */
ksoftirqd_run_end(void)303 static inline void ksoftirqd_run_end(void)
304 {
305 	__local_bh_enable(SOFTIRQ_OFFSET, true);
306 	WARN_ON_ONCE(in_interrupt());
307 	local_irq_enable();
308 }
309 
softirq_handle_begin(void)310 static inline void softirq_handle_begin(void) { }
softirq_handle_end(void)311 static inline void softirq_handle_end(void) { }
312 
should_wake_ksoftirqd(void)313 static inline bool should_wake_ksoftirqd(void)
314 {
315 	return !this_cpu_read(softirq_ctrl.cnt);
316 }
317 
invoke_softirq(void)318 static inline void invoke_softirq(void)
319 {
320 	if (should_wake_ksoftirqd())
321 		wakeup_softirqd();
322 }
323 
324 /*
325  * flush_smp_call_function_queue() can raise a soft interrupt in a function
326  * call. On RT kernels this is undesired and the only known functionality
327  * in the block layer which does this is disabled on RT. If soft interrupts
328  * get raised which haven't been raised before the flush, warn so it can be
329  * investigated.
330  */
do_softirq_post_smp_call_flush(unsigned int was_pending)331 void do_softirq_post_smp_call_flush(unsigned int was_pending)
332 {
333 	if (WARN_ON_ONCE(was_pending != local_softirq_pending()))
334 		invoke_softirq();
335 }
336 
337 #else /* CONFIG_PREEMPT_RT */
338 
339 /*
340  * This one is for softirq.c-internal use, where hardirqs are disabled
341  * legitimately:
342  */
343 #ifdef CONFIG_TRACE_IRQFLAGS
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)344 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
345 {
346 	unsigned long flags;
347 
348 	WARN_ON_ONCE(in_hardirq());
349 
350 	raw_local_irq_save(flags);
351 	/*
352 	 * The preempt tracer hooks into preempt_count_add and will break
353 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
354 	 * is set and before current->softirq_enabled is cleared.
355 	 * We must manually increment preempt_count here and manually
356 	 * call the trace_preempt_off later.
357 	 */
358 	__preempt_count_add(cnt);
359 	/*
360 	 * Were softirqs turned off above:
361 	 */
362 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
363 		lockdep_softirqs_off(ip);
364 	raw_local_irq_restore(flags);
365 
366 	if (preempt_count() == cnt) {
367 #ifdef CONFIG_DEBUG_PREEMPT
368 		current->preempt_disable_ip = get_lock_parent_ip();
369 #endif
370 		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
371 	}
372 }
373 EXPORT_SYMBOL(__local_bh_disable_ip);
374 #endif /* CONFIG_TRACE_IRQFLAGS */
375 
__local_bh_enable(unsigned int cnt)376 static void __local_bh_enable(unsigned int cnt)
377 {
378 	lockdep_assert_irqs_disabled();
379 
380 	if (preempt_count() == cnt)
381 		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
382 
383 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
384 		lockdep_softirqs_on(_RET_IP_);
385 
386 	__preempt_count_sub(cnt);
387 }
388 
389 /*
390  * Special-case - softirqs can safely be enabled by __do_softirq(),
391  * without processing still-pending softirqs:
392  */
_local_bh_enable(void)393 void _local_bh_enable(void)
394 {
395 	WARN_ON_ONCE(in_hardirq());
396 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
397 }
398 EXPORT_SYMBOL(_local_bh_enable);
399 
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)400 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
401 {
402 	WARN_ON_ONCE(in_hardirq());
403 	lockdep_assert_irqs_enabled();
404 #ifdef CONFIG_TRACE_IRQFLAGS
405 	local_irq_disable();
406 #endif
407 	/*
408 	 * Are softirqs going to be turned on now:
409 	 */
410 	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
411 		lockdep_softirqs_on(ip);
412 	/*
413 	 * Keep preemption disabled until we are done with
414 	 * softirq processing:
415 	 */
416 	__preempt_count_sub(cnt - 1);
417 
418 	if (unlikely(!in_interrupt() && local_softirq_pending())) {
419 		/*
420 		 * Run softirq if any pending. And do it in its own stack
421 		 * as we may be calling this deep in a task call stack already.
422 		 */
423 		do_softirq();
424 	}
425 
426 	preempt_count_dec();
427 #ifdef CONFIG_TRACE_IRQFLAGS
428 	local_irq_enable();
429 #endif
430 	preempt_check_resched();
431 }
432 EXPORT_SYMBOL(__local_bh_enable_ip);
433 
softirq_handle_begin(void)434 static inline void softirq_handle_begin(void)
435 {
436 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
437 }
438 
softirq_handle_end(void)439 static inline void softirq_handle_end(void)
440 {
441 	__local_bh_enable(SOFTIRQ_OFFSET);
442 	WARN_ON_ONCE(in_interrupt());
443 }
444 
ksoftirqd_run_begin(void)445 static inline void ksoftirqd_run_begin(void)
446 {
447 	local_irq_disable();
448 }
449 
ksoftirqd_run_end(void)450 static inline void ksoftirqd_run_end(void)
451 {
452 	local_irq_enable();
453 }
454 
should_wake_ksoftirqd(void)455 static inline bool should_wake_ksoftirqd(void)
456 {
457 	return true;
458 }
459 
invoke_softirq(void)460 static inline void invoke_softirq(void)
461 {
462 	if (ksoftirqd_running(local_softirq_pending()))
463 		return;
464 
465 	if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
466 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
467 		/*
468 		 * We can safely execute softirq on the current stack if
469 		 * it is the irq stack, because it should be near empty
470 		 * at this stage.
471 		 */
472 		__do_softirq();
473 #else
474 		/*
475 		 * Otherwise, irq_exit() is called on the task stack that can
476 		 * be potentially deep already. So call softirq in its own stack
477 		 * to prevent from any overrun.
478 		 */
479 		do_softirq_own_stack();
480 #endif
481 	} else {
482 		wakeup_softirqd();
483 	}
484 }
485 
do_softirq(void)486 asmlinkage __visible void do_softirq(void)
487 {
488 	__u32 pending;
489 	unsigned long flags;
490 
491 	if (in_interrupt())
492 		return;
493 
494 	local_irq_save(flags);
495 
496 	pending = local_softirq_pending();
497 
498 	if (pending && !ksoftirqd_running(pending))
499 		do_softirq_own_stack();
500 
501 	local_irq_restore(flags);
502 }
503 
504 #endif /* !CONFIG_PREEMPT_RT */
505 
506 /*
507  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
508  * but break the loop if need_resched() is set or after 2 ms.
509  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
510  * certain cases, such as stop_machine(), jiffies may cease to
511  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
512  * well to make sure we eventually return from this method.
513  *
514  * These limits have been established via experimentation.
515  * The two things to balance is latency against fairness -
516  * we want to handle softirqs as soon as possible, but they
517  * should not be able to lock up the box.
518  */
519 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
520 #define MAX_SOFTIRQ_RESTART 10
521 
522 #ifdef CONFIG_TRACE_IRQFLAGS
523 /*
524  * When we run softirqs from irq_exit() and thus on the hardirq stack we need
525  * to keep the lockdep irq context tracking as tight as possible in order to
526  * not miss-qualify lock contexts and miss possible deadlocks.
527  */
528 
lockdep_softirq_start(void)529 static inline bool lockdep_softirq_start(void)
530 {
531 	bool in_hardirq = false;
532 
533 	if (lockdep_hardirq_context()) {
534 		in_hardirq = true;
535 		lockdep_hardirq_exit();
536 	}
537 
538 	lockdep_softirq_enter();
539 
540 	return in_hardirq;
541 }
542 
lockdep_softirq_end(bool in_hardirq)543 static inline void lockdep_softirq_end(bool in_hardirq)
544 {
545 	lockdep_softirq_exit();
546 
547 	if (in_hardirq)
548 		lockdep_hardirq_enter();
549 }
550 #else
lockdep_softirq_start(void)551 static inline bool lockdep_softirq_start(void) { return false; }
lockdep_softirq_end(bool in_hardirq)552 static inline void lockdep_softirq_end(bool in_hardirq) { }
553 #endif
554 
555 #ifdef CONFIG_RT_SOFTIRQ_AWARE_SCHED
softirq_deferred_for_rt(__u32 * pending)556 static __u32 softirq_deferred_for_rt(__u32 *pending)
557 {
558 	__u32 deferred = 0;
559 
560 	if (rt_task(current)) {
561 		deferred = *pending & LONG_SOFTIRQ_MASK;
562 		*pending &= ~LONG_SOFTIRQ_MASK;
563 	}
564 	return deferred;
565 }
566 #else
567 #define softirq_deferred_for_rt(x) (0)
568 #endif
569 
__do_softirq(void)570 asmlinkage __visible void __softirq_entry __do_softirq(void)
571 {
572 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
573 	unsigned long old_flags = current->flags;
574 	int max_restart = MAX_SOFTIRQ_RESTART;
575 	struct softirq_action *h;
576 	bool in_hardirq;
577 	__u32 deferred;
578 	__u32 pending;
579 	int softirq_bit;
580 
581 	/*
582 	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
583 	 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
584 	 * again if the socket is related to swapping.
585 	 */
586 	current->flags &= ~PF_MEMALLOC;
587 
588 	pending = local_softirq_pending();
589 	deferred = softirq_deferred_for_rt(&pending);
590 
591 	softirq_handle_begin();
592 
593 	in_hardirq = lockdep_softirq_start();
594 	account_softirq_enter(current);
595 
596 restart:
597 	/* Reset the pending bitmask before enabling irqs */
598 	set_softirq_pending(deferred);
599 	set_active_softirqs(pending);
600 
601 	local_irq_enable();
602 
603 	h = softirq_vec;
604 
605 	while ((softirq_bit = ffs(pending))) {
606 		unsigned int vec_nr;
607 		int prev_count;
608 
609 		h += softirq_bit - 1;
610 
611 		vec_nr = h - softirq_vec;
612 		prev_count = preempt_count();
613 
614 		kstat_incr_softirqs_this_cpu(vec_nr);
615 
616 		trace_softirq_entry(vec_nr);
617 		h->action(h);
618 		trace_softirq_exit(vec_nr);
619 		if (unlikely(prev_count != preempt_count())) {
620 			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
621 			       vec_nr, softirq_to_name[vec_nr], h->action,
622 			       prev_count, preempt_count());
623 			preempt_count_set(prev_count);
624 		}
625 		h++;
626 		pending >>= softirq_bit;
627 	}
628 
629 	set_active_softirqs(0);
630 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
631 	    __this_cpu_read(ksoftirqd) == current)
632 		rcu_softirq_qs();
633 
634 	local_irq_disable();
635 
636 	pending = local_softirq_pending();
637 	deferred = softirq_deferred_for_rt(&pending);
638 
639 	if (pending) {
640 		if (time_before(jiffies, end) && !need_resched() &&
641 		    --max_restart)
642 			goto restart;
643 	}
644 
645 	if (pending | deferred)
646 		wakeup_softirqd();
647 
648 	account_softirq_exit(current);
649 	lockdep_softirq_end(in_hardirq);
650 	softirq_handle_end();
651 	current_restore_flags(old_flags, PF_MEMALLOC);
652 }
653 
654 /**
655  * irq_enter_rcu - Enter an interrupt context with RCU watching
656  */
irq_enter_rcu(void)657 void irq_enter_rcu(void)
658 {
659 	__irq_enter_raw();
660 
661 	if (tick_nohz_full_cpu(smp_processor_id()) ||
662 	    (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
663 		tick_irq_enter();
664 
665 	account_hardirq_enter(current);
666 }
667 
668 /**
669  * irq_enter - Enter an interrupt context including RCU update
670  */
irq_enter(void)671 void irq_enter(void)
672 {
673 	ct_irq_enter();
674 	irq_enter_rcu();
675 }
676 
tick_irq_exit(void)677 static inline void tick_irq_exit(void)
678 {
679 #ifdef CONFIG_NO_HZ_COMMON
680 	int cpu = smp_processor_id();
681 
682 	/* Make sure that timer wheel updates are propagated */
683 	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
684 		if (!in_hardirq())
685 			tick_nohz_irq_exit();
686 	}
687 #endif
688 }
689 
__irq_exit_rcu(void)690 static inline void __irq_exit_rcu(void)
691 {
692 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
693 	local_irq_disable();
694 #else
695 	lockdep_assert_irqs_disabled();
696 #endif
697 	account_hardirq_exit(current);
698 	preempt_count_sub(HARDIRQ_OFFSET);
699 	if (!in_interrupt() && local_softirq_pending())
700 		invoke_softirq();
701 
702 	tick_irq_exit();
703 }
704 
705 /**
706  * irq_exit_rcu() - Exit an interrupt context without updating RCU
707  *
708  * Also processes softirqs if needed and possible.
709  */
irq_exit_rcu(void)710 void irq_exit_rcu(void)
711 {
712 	__irq_exit_rcu();
713 	 /* must be last! */
714 	lockdep_hardirq_exit();
715 }
716 
717 /**
718  * irq_exit - Exit an interrupt context, update RCU and lockdep
719  *
720  * Also processes softirqs if needed and possible.
721  */
irq_exit(void)722 void irq_exit(void)
723 {
724 	__irq_exit_rcu();
725 	ct_irq_exit();
726 	 /* must be last! */
727 	lockdep_hardirq_exit();
728 }
729 
730 /*
731  * This function must run with irqs disabled!
732  */
raise_softirq_irqoff(unsigned int nr)733 inline void raise_softirq_irqoff(unsigned int nr)
734 {
735 	__raise_softirq_irqoff(nr);
736 
737 	/*
738 	 * If we're in an interrupt or softirq, we're done
739 	 * (this also catches softirq-disabled code). We will
740 	 * actually run the softirq once we return from
741 	 * the irq or softirq.
742 	 *
743 	 * Otherwise we wake up ksoftirqd to make sure we
744 	 * schedule the softirq soon.
745 	 */
746 	if (!in_interrupt() && should_wake_ksoftirqd())
747 		wakeup_softirqd();
748 }
749 
raise_softirq(unsigned int nr)750 void raise_softirq(unsigned int nr)
751 {
752 	unsigned long flags;
753 
754 	local_irq_save(flags);
755 	raise_softirq_irqoff(nr);
756 	local_irq_restore(flags);
757 }
758 EXPORT_SYMBOL_GPL(raise_softirq);
759 
__raise_softirq_irqoff(unsigned int nr)760 void __raise_softirq_irqoff(unsigned int nr)
761 {
762 	lockdep_assert_irqs_disabled();
763 	trace_softirq_raise(nr);
764 	or_softirq_pending(1UL << nr);
765 }
766 
open_softirq(int nr,void (* action)(struct softirq_action *))767 void open_softirq(int nr, void (*action)(struct softirq_action *))
768 {
769 	softirq_vec[nr].action = action;
770 }
771 
772 /*
773  * Tasklets
774  */
775 struct tasklet_head {
776 	struct tasklet_struct *head;
777 	struct tasklet_struct **tail;
778 };
779 
780 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
781 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
782 
__tasklet_schedule_common(struct tasklet_struct * t,struct tasklet_head __percpu * headp,unsigned int softirq_nr)783 static void __tasklet_schedule_common(struct tasklet_struct *t,
784 				      struct tasklet_head __percpu *headp,
785 				      unsigned int softirq_nr)
786 {
787 	struct tasklet_head *head;
788 	unsigned long flags;
789 
790 	local_irq_save(flags);
791 	head = this_cpu_ptr(headp);
792 	t->next = NULL;
793 	*head->tail = t;
794 	head->tail = &(t->next);
795 	raise_softirq_irqoff(softirq_nr);
796 	local_irq_restore(flags);
797 }
798 
__tasklet_schedule(struct tasklet_struct * t)799 void __tasklet_schedule(struct tasklet_struct *t)
800 {
801 	__tasklet_schedule_common(t, &tasklet_vec,
802 				  TASKLET_SOFTIRQ);
803 }
804 EXPORT_SYMBOL(__tasklet_schedule);
805 
__tasklet_hi_schedule(struct tasklet_struct * t)806 void __tasklet_hi_schedule(struct tasklet_struct *t)
807 {
808 	__tasklet_schedule_common(t, &tasklet_hi_vec,
809 				  HI_SOFTIRQ);
810 }
811 EXPORT_SYMBOL(__tasklet_hi_schedule);
812 
tasklet_clear_sched(struct tasklet_struct * t)813 static bool tasklet_clear_sched(struct tasklet_struct *t)
814 {
815 	if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
816 		wake_up_var(&t->state);
817 		return true;
818 	}
819 
820 	WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
821 		  t->use_callback ? "callback" : "func",
822 		  t->use_callback ? (void *)t->callback : (void *)t->func);
823 
824 	return false;
825 }
826 
tasklet_action_common(struct softirq_action * a,struct tasklet_head * tl_head,unsigned int softirq_nr)827 static void tasklet_action_common(struct softirq_action *a,
828 				  struct tasklet_head *tl_head,
829 				  unsigned int softirq_nr)
830 {
831 	struct tasklet_struct *list;
832 
833 	local_irq_disable();
834 	list = tl_head->head;
835 	tl_head->head = NULL;
836 	tl_head->tail = &tl_head->head;
837 	local_irq_enable();
838 
839 	while (list) {
840 		struct tasklet_struct *t = list;
841 
842 		list = list->next;
843 
844 		if (tasklet_trylock(t)) {
845 			if (!atomic_read(&t->count)) {
846 				if (tasklet_clear_sched(t)) {
847 					if (t->use_callback) {
848 						trace_tasklet_entry(t->callback);
849 						t->callback(t);
850 						trace_tasklet_exit(t->callback);
851 					} else {
852 						trace_tasklet_entry(t->func);
853 						t->func(t->data);
854 						trace_tasklet_exit(t->func);
855 					}
856 				}
857 				tasklet_unlock(t);
858 				continue;
859 			}
860 			tasklet_unlock(t);
861 		}
862 
863 		local_irq_disable();
864 		t->next = NULL;
865 		*tl_head->tail = t;
866 		tl_head->tail = &t->next;
867 		__raise_softirq_irqoff(softirq_nr);
868 		local_irq_enable();
869 	}
870 }
871 
tasklet_action(struct softirq_action * a)872 static __latent_entropy void tasklet_action(struct softirq_action *a)
873 {
874 	tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
875 }
876 
tasklet_hi_action(struct softirq_action * a)877 static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
878 {
879 	tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
880 }
881 
tasklet_setup(struct tasklet_struct * t,void (* callback)(struct tasklet_struct *))882 void tasklet_setup(struct tasklet_struct *t,
883 		   void (*callback)(struct tasklet_struct *))
884 {
885 	t->next = NULL;
886 	t->state = 0;
887 	atomic_set(&t->count, 0);
888 	t->callback = callback;
889 	t->use_callback = true;
890 	t->data = 0;
891 }
892 EXPORT_SYMBOL(tasklet_setup);
893 
tasklet_init(struct tasklet_struct * t,void (* func)(unsigned long),unsigned long data)894 void tasklet_init(struct tasklet_struct *t,
895 		  void (*func)(unsigned long), unsigned long data)
896 {
897 	t->next = NULL;
898 	t->state = 0;
899 	atomic_set(&t->count, 0);
900 	t->func = func;
901 	t->use_callback = false;
902 	t->data = data;
903 }
904 EXPORT_SYMBOL(tasklet_init);
905 
906 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
907 /*
908  * Do not use in new code. Waiting for tasklets from atomic contexts is
909  * error prone and should be avoided.
910  */
tasklet_unlock_spin_wait(struct tasklet_struct * t)911 void tasklet_unlock_spin_wait(struct tasklet_struct *t)
912 {
913 	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
914 		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
915 			/*
916 			 * Prevent a live lock when current preempted soft
917 			 * interrupt processing or prevents ksoftirqd from
918 			 * running. If the tasklet runs on a different CPU
919 			 * then this has no effect other than doing the BH
920 			 * disable/enable dance for nothing.
921 			 */
922 			local_bh_disable();
923 			local_bh_enable();
924 		} else {
925 			cpu_relax();
926 		}
927 	}
928 }
929 EXPORT_SYMBOL(tasklet_unlock_spin_wait);
930 #endif
931 
tasklet_kill(struct tasklet_struct * t)932 void tasklet_kill(struct tasklet_struct *t)
933 {
934 	if (in_interrupt())
935 		pr_notice("Attempt to kill tasklet from interrupt\n");
936 
937 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
938 		wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
939 
940 	tasklet_unlock_wait(t);
941 	tasklet_clear_sched(t);
942 }
943 EXPORT_SYMBOL(tasklet_kill);
944 
945 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
tasklet_unlock(struct tasklet_struct * t)946 void tasklet_unlock(struct tasklet_struct *t)
947 {
948 	smp_mb__before_atomic();
949 	clear_bit(TASKLET_STATE_RUN, &t->state);
950 	smp_mb__after_atomic();
951 	wake_up_var(&t->state);
952 }
953 EXPORT_SYMBOL_GPL(tasklet_unlock);
954 
tasklet_unlock_wait(struct tasklet_struct * t)955 void tasklet_unlock_wait(struct tasklet_struct *t)
956 {
957 	wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
958 }
959 EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
960 #endif
961 
softirq_init(void)962 void __init softirq_init(void)
963 {
964 	int cpu;
965 
966 	for_each_possible_cpu(cpu) {
967 		per_cpu(tasklet_vec, cpu).tail =
968 			&per_cpu(tasklet_vec, cpu).head;
969 		per_cpu(tasklet_hi_vec, cpu).tail =
970 			&per_cpu(tasklet_hi_vec, cpu).head;
971 	}
972 
973 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
974 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
975 }
976 
ksoftirqd_should_run(unsigned int cpu)977 static int ksoftirqd_should_run(unsigned int cpu)
978 {
979 	return local_softirq_pending();
980 }
981 
run_ksoftirqd(unsigned int cpu)982 static void run_ksoftirqd(unsigned int cpu)
983 {
984 	ksoftirqd_run_begin();
985 	if (local_softirq_pending()) {
986 		/*
987 		 * We can safely run softirq on inline stack, as we are not deep
988 		 * in the task stack here.
989 		 */
990 		__do_softirq();
991 		ksoftirqd_run_end();
992 		cond_resched();
993 		return;
994 	}
995 	ksoftirqd_run_end();
996 }
997 
998 #ifdef CONFIG_HOTPLUG_CPU
takeover_tasklets(unsigned int cpu)999 static int takeover_tasklets(unsigned int cpu)
1000 {
1001 	/* CPU is dead, so no lock needed. */
1002 	local_irq_disable();
1003 
1004 	/* Find end, append list for that CPU. */
1005 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
1006 		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
1007 		__this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
1008 		per_cpu(tasklet_vec, cpu).head = NULL;
1009 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
1010 	}
1011 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
1012 
1013 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
1014 		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
1015 		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
1016 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
1017 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
1018 	}
1019 	raise_softirq_irqoff(HI_SOFTIRQ);
1020 
1021 	local_irq_enable();
1022 	return 0;
1023 }
1024 #else
1025 #define takeover_tasklets	NULL
1026 #endif /* CONFIG_HOTPLUG_CPU */
1027 
1028 static struct smp_hotplug_thread softirq_threads = {
1029 	.store			= &ksoftirqd,
1030 	.thread_should_run	= ksoftirqd_should_run,
1031 	.thread_fn		= run_ksoftirqd,
1032 	.thread_comm		= "ksoftirqd/%u",
1033 };
1034 
spawn_ksoftirqd(void)1035 static __init int spawn_ksoftirqd(void)
1036 {
1037 	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
1038 				  takeover_tasklets);
1039 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
1040 
1041 	return 0;
1042 }
1043 early_initcall(spawn_ksoftirqd);
1044 
1045 /*
1046  * [ These __weak aliases are kept in a separate compilation unit, so that
1047  *   GCC does not inline them incorrectly. ]
1048  */
1049 
early_irq_init(void)1050 int __init __weak early_irq_init(void)
1051 {
1052 	return 0;
1053 }
1054 
arch_probe_nr_irqs(void)1055 int __init __weak arch_probe_nr_irqs(void)
1056 {
1057 	return NR_IRQS_LEGACY;
1058 }
1059 
arch_early_irq_init(void)1060 int __init __weak arch_early_irq_init(void)
1061 {
1062 	return 0;
1063 }
1064 
arch_dynirq_lower_bound(unsigned int from)1065 unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1066 {
1067 	return from;
1068 }
1069