1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * linux/kernel/softirq.c
4 *
5 * Copyright (C) 1992 Linus Torvalds
6 *
7 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
8 */
9
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12 #include <linux/export.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/interrupt.h>
15 #include <linux/init.h>
16 #include <linux/local_lock.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 #include <linux/irq.h>
29 #include <linux/wait_bit.h>
30
31 #include <asm/softirq_stack.h>
32
33 #define CREATE_TRACE_POINTS
34 #include <trace/events/irq.h>
35
36 EXPORT_TRACEPOINT_SYMBOL_GPL(irq_handler_entry);
37 EXPORT_TRACEPOINT_SYMBOL_GPL(irq_handler_exit);
38 EXPORT_TRACEPOINT_SYMBOL_GPL(softirq_entry);
39 EXPORT_TRACEPOINT_SYMBOL_GPL(softirq_exit);
40 EXPORT_TRACEPOINT_SYMBOL_GPL(tasklet_entry);
41 EXPORT_TRACEPOINT_SYMBOL_GPL(tasklet_exit);
42
43 /*
44 - No shared variables, all the data are CPU local.
45 - If a softirq needs serialization, let it serialize itself
46 by its own spinlocks.
47 - Even if softirq is serialized, only local cpu is marked for
48 execution. Hence, we get something sort of weak cpu binding.
49 Though it is still not clear, will it result in better locality
50 or will not.
51
52 Examples:
53 - NET RX softirq. It is multithreaded and does not require
54 any global serialization.
55 - NET TX softirq. It kicks software netdevice queues, hence
56 it is logically serialized per device, but this serialization
57 is invisible to common code.
58 - Tasklets: serialized wrt itself.
59 */
60
61 #ifndef __ARCH_IRQ_STAT
62 DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
63 EXPORT_PER_CPU_SYMBOL(irq_stat);
64 #endif
65
66 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
67
68 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
69 EXPORT_PER_CPU_SYMBOL_GPL(ksoftirqd);
70
71 /*
72 * active_softirqs -- per cpu, a mask of softirqs that are being handled,
73 * with the expectation that approximate answers are acceptable and therefore
74 * no synchronization.
75 */
76 DEFINE_PER_CPU(__u32, active_softirqs);
77
78 const char * const softirq_to_name[NR_SOFTIRQS] = {
79 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
80 "TASKLET", "SCHED", "HRTIMER", "RCU"
81 };
82
83 /*
84 * we cannot loop indefinitely here to avoid userspace starvation,
85 * but we also don't want to introduce a worst case 1/HZ latency
86 * to the pending events, so lets the scheduler to balance
87 * the softirq load for us.
88 */
wakeup_softirqd(void)89 static void wakeup_softirqd(void)
90 {
91 /* Interrupts are disabled: no need to stop preemption */
92 struct task_struct *tsk = __this_cpu_read(ksoftirqd);
93
94 if (tsk)
95 wake_up_process(tsk);
96 }
97
98 #ifdef CONFIG_TRACE_IRQFLAGS
99 DEFINE_PER_CPU(int, hardirqs_enabled);
100 DEFINE_PER_CPU(int, hardirq_context);
101 EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
102 EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
103 #endif
104
105 /*
106 * SOFTIRQ_OFFSET usage:
107 *
108 * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
109 * to a per CPU counter and to task::softirqs_disabled_cnt.
110 *
111 * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
112 * processing.
113 *
114 * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
115 * on local_bh_disable or local_bh_enable.
116 *
117 * This lets us distinguish between whether we are currently processing
118 * softirq and whether we just have bh disabled.
119 */
120 #ifdef CONFIG_PREEMPT_RT
121
122 /*
123 * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
124 * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
125 * softirq disabled section to be preempted.
126 *
127 * The per task counter is used for softirq_count(), in_softirq() and
128 * in_serving_softirqs() because these counts are only valid when the task
129 * holding softirq_ctrl::lock is running.
130 *
131 * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
132 * the task which is in a softirq disabled section is preempted or blocks.
133 */
134 struct softirq_ctrl {
135 local_lock_t lock;
136 int cnt;
137 };
138
139 static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
140 .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock),
141 };
142
143 /**
144 * local_bh_blocked() - Check for idle whether BH processing is blocked
145 *
146 * Returns false if the per CPU softirq::cnt is 0 otherwise true.
147 *
148 * This is invoked from the idle task to guard against false positive
149 * softirq pending warnings, which would happen when the task which holds
150 * softirq_ctrl::lock was the only running task on the CPU and blocks on
151 * some other lock.
152 */
local_bh_blocked(void)153 bool local_bh_blocked(void)
154 {
155 return __this_cpu_read(softirq_ctrl.cnt) != 0;
156 }
157
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)158 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
159 {
160 unsigned long flags;
161 int newcnt;
162
163 WARN_ON_ONCE(in_hardirq());
164
165 /* First entry of a task into a BH disabled section? */
166 if (!current->softirq_disable_cnt) {
167 if (preemptible()) {
168 local_lock(&softirq_ctrl.lock);
169 /* Required to meet the RCU bottomhalf requirements. */
170 rcu_read_lock();
171 } else {
172 DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
173 }
174 }
175
176 /*
177 * Track the per CPU softirq disabled state. On RT this is per CPU
178 * state to allow preemption of bottom half disabled sections.
179 */
180 newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
181 /*
182 * Reflect the result in the task state to prevent recursion on the
183 * local lock and to make softirq_count() & al work.
184 */
185 current->softirq_disable_cnt = newcnt;
186
187 if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
188 raw_local_irq_save(flags);
189 lockdep_softirqs_off(ip);
190 raw_local_irq_restore(flags);
191 }
192 }
193 EXPORT_SYMBOL(__local_bh_disable_ip);
194
__local_bh_enable(unsigned int cnt,bool unlock)195 static void __local_bh_enable(unsigned int cnt, bool unlock)
196 {
197 unsigned long flags;
198 int newcnt;
199
200 DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
201 this_cpu_read(softirq_ctrl.cnt));
202
203 if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
204 raw_local_irq_save(flags);
205 lockdep_softirqs_on(_RET_IP_);
206 raw_local_irq_restore(flags);
207 }
208
209 newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
210 current->softirq_disable_cnt = newcnt;
211
212 if (!newcnt && unlock) {
213 rcu_read_unlock();
214 local_unlock(&softirq_ctrl.lock);
215 }
216 }
217
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)218 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
219 {
220 bool preempt_on = preemptible();
221 unsigned long flags;
222 u32 pending;
223 int curcnt;
224
225 WARN_ON_ONCE(in_irq());
226 lockdep_assert_irqs_enabled();
227
228 local_irq_save(flags);
229 curcnt = __this_cpu_read(softirq_ctrl.cnt);
230
231 /*
232 * If this is not reenabling soft interrupts, no point in trying to
233 * run pending ones.
234 */
235 if (curcnt != cnt)
236 goto out;
237
238 pending = local_softirq_pending();
239 if (!pending)
240 goto out;
241
242 /*
243 * If this was called from non preemptible context, wake up the
244 * softirq daemon.
245 */
246 if (!preempt_on) {
247 wakeup_softirqd();
248 goto out;
249 }
250
251 /*
252 * Adjust softirq count to SOFTIRQ_OFFSET which makes
253 * in_serving_softirq() become true.
254 */
255 cnt = SOFTIRQ_OFFSET;
256 __local_bh_enable(cnt, false);
257 __do_softirq();
258
259 out:
260 __local_bh_enable(cnt, preempt_on);
261 local_irq_restore(flags);
262 }
263 EXPORT_SYMBOL(__local_bh_enable_ip);
264
265 /*
266 * Invoked from ksoftirqd_run() outside of the interrupt disabled section
267 * to acquire the per CPU local lock for reentrancy protection.
268 */
ksoftirqd_run_begin(void)269 static inline void ksoftirqd_run_begin(void)
270 {
271 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
272 local_irq_disable();
273 }
274
275 /* Counterpart to ksoftirqd_run_begin() */
ksoftirqd_run_end(void)276 static inline void ksoftirqd_run_end(void)
277 {
278 __local_bh_enable(SOFTIRQ_OFFSET, true);
279 WARN_ON_ONCE(in_interrupt());
280 local_irq_enable();
281 }
282
softirq_handle_begin(void)283 static inline void softirq_handle_begin(void) { }
softirq_handle_end(void)284 static inline void softirq_handle_end(void) { }
285
should_wake_ksoftirqd(void)286 static inline bool should_wake_ksoftirqd(void)
287 {
288 return !this_cpu_read(softirq_ctrl.cnt);
289 }
290
invoke_softirq(void)291 static inline void invoke_softirq(void)
292 {
293 if (should_wake_ksoftirqd())
294 wakeup_softirqd();
295 }
296
297 #else /* CONFIG_PREEMPT_RT */
298
299 /*
300 * This one is for softirq.c-internal use, where hardirqs are disabled
301 * legitimately:
302 */
303 #ifdef CONFIG_TRACE_IRQFLAGS
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)304 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
305 {
306 unsigned long flags;
307
308 WARN_ON_ONCE(in_irq());
309
310 raw_local_irq_save(flags);
311 /*
312 * The preempt tracer hooks into preempt_count_add and will break
313 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
314 * is set and before current->softirq_enabled is cleared.
315 * We must manually increment preempt_count here and manually
316 * call the trace_preempt_off later.
317 */
318 __preempt_count_add(cnt);
319 /*
320 * Were softirqs turned off above:
321 */
322 if (softirq_count() == (cnt & SOFTIRQ_MASK))
323 lockdep_softirqs_off(ip);
324 raw_local_irq_restore(flags);
325
326 if (preempt_count() == cnt) {
327 #ifdef CONFIG_DEBUG_PREEMPT
328 current->preempt_disable_ip = get_lock_parent_ip();
329 #endif
330 trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
331 }
332 }
333 EXPORT_SYMBOL(__local_bh_disable_ip);
334 #endif /* CONFIG_TRACE_IRQFLAGS */
335
__local_bh_enable(unsigned int cnt)336 static void __local_bh_enable(unsigned int cnt)
337 {
338 lockdep_assert_irqs_disabled();
339
340 if (preempt_count() == cnt)
341 trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
342
343 if (softirq_count() == (cnt & SOFTIRQ_MASK))
344 lockdep_softirqs_on(_RET_IP_);
345
346 __preempt_count_sub(cnt);
347 }
348
349 /*
350 * Special-case - softirqs can safely be enabled by __do_softirq(),
351 * without processing still-pending softirqs:
352 */
_local_bh_enable(void)353 void _local_bh_enable(void)
354 {
355 WARN_ON_ONCE(in_irq());
356 __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
357 }
358 EXPORT_SYMBOL(_local_bh_enable);
359
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)360 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
361 {
362 WARN_ON_ONCE(in_irq());
363 lockdep_assert_irqs_enabled();
364 #ifdef CONFIG_TRACE_IRQFLAGS
365 local_irq_disable();
366 #endif
367 /*
368 * Are softirqs going to be turned on now:
369 */
370 if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
371 lockdep_softirqs_on(ip);
372 /*
373 * Keep preemption disabled until we are done with
374 * softirq processing:
375 */
376 __preempt_count_sub(cnt - 1);
377
378 if (unlikely(!in_interrupt() && local_softirq_pending())) {
379 /*
380 * Run softirq if any pending. And do it in its own stack
381 * as we may be calling this deep in a task call stack already.
382 */
383 do_softirq();
384 }
385
386 preempt_count_dec();
387 #ifdef CONFIG_TRACE_IRQFLAGS
388 local_irq_enable();
389 #endif
390 preempt_check_resched();
391 }
392 EXPORT_SYMBOL(__local_bh_enable_ip);
393
softirq_handle_begin(void)394 static inline void softirq_handle_begin(void)
395 {
396 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
397 }
398
softirq_handle_end(void)399 static inline void softirq_handle_end(void)
400 {
401 __local_bh_enable(SOFTIRQ_OFFSET);
402 WARN_ON_ONCE(in_interrupt());
403 }
404
ksoftirqd_run_begin(void)405 static inline void ksoftirqd_run_begin(void)
406 {
407 local_irq_disable();
408 }
409
ksoftirqd_run_end(void)410 static inline void ksoftirqd_run_end(void)
411 {
412 local_irq_enable();
413 }
414
should_wake_ksoftirqd(void)415 static inline bool should_wake_ksoftirqd(void)
416 {
417 return true;
418 }
419
invoke_softirq(void)420 static inline void invoke_softirq(void)
421 {
422 if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
423 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
424 /*
425 * We can safely execute softirq on the current stack if
426 * it is the irq stack, because it should be near empty
427 * at this stage.
428 */
429 __do_softirq();
430 #else
431 /*
432 * Otherwise, irq_exit() is called on the task stack that can
433 * be potentially deep already. So call softirq in its own stack
434 * to prevent from any overrun.
435 */
436 do_softirq_own_stack();
437 #endif
438 } else {
439 wakeup_softirqd();
440 }
441 }
442
do_softirq(void)443 asmlinkage __visible void do_softirq(void)
444 {
445 __u32 pending;
446 unsigned long flags;
447
448 if (in_interrupt())
449 return;
450
451 local_irq_save(flags);
452
453 pending = local_softirq_pending();
454
455 if (pending)
456 do_softirq_own_stack();
457
458 local_irq_restore(flags);
459 }
460
461 #endif /* !CONFIG_PREEMPT_RT */
462
463 /*
464 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
465 * but break the loop if need_resched() is set or after 2 ms.
466 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
467 * certain cases, such as stop_machine(), jiffies may cease to
468 * increment and so we need the MAX_SOFTIRQ_RESTART limit as
469 * well to make sure we eventually return from this method.
470 *
471 * These limits have been established via experimentation.
472 * The two things to balance is latency against fairness -
473 * we want to handle softirqs as soon as possible, but they
474 * should not be able to lock up the box.
475 */
476 #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2)
477 #define MAX_SOFTIRQ_RESTART 10
478
479 #ifdef CONFIG_TRACE_IRQFLAGS
480 /*
481 * When we run softirqs from irq_exit() and thus on the hardirq stack we need
482 * to keep the lockdep irq context tracking as tight as possible in order to
483 * not miss-qualify lock contexts and miss possible deadlocks.
484 */
485
lockdep_softirq_start(void)486 static inline bool lockdep_softirq_start(void)
487 {
488 bool in_hardirq = false;
489
490 if (lockdep_hardirq_context()) {
491 in_hardirq = true;
492 lockdep_hardirq_exit();
493 }
494
495 lockdep_softirq_enter();
496
497 return in_hardirq;
498 }
499
lockdep_softirq_end(bool in_hardirq)500 static inline void lockdep_softirq_end(bool in_hardirq)
501 {
502 lockdep_softirq_exit();
503
504 if (in_hardirq)
505 lockdep_hardirq_enter();
506 }
507 #else
lockdep_softirq_start(void)508 static inline bool lockdep_softirq_start(void) { return false; }
lockdep_softirq_end(bool in_hardirq)509 static inline void lockdep_softirq_end(bool in_hardirq) { }
510 #endif
511
softirq_deferred_for_rt(__u32 * pending)512 static inline __u32 softirq_deferred_for_rt(__u32 *pending)
513 {
514 __u32 deferred = 0;
515
516 if (cpupri_check_rt()) {
517 deferred = *pending & LONG_SOFTIRQ_MASK;
518 *pending &= ~LONG_SOFTIRQ_MASK;
519 }
520 return deferred;
521 }
522
__do_softirq(void)523 asmlinkage __visible void __softirq_entry __do_softirq(void)
524 {
525 unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
526 unsigned long old_flags = current->flags;
527 int max_restart = MAX_SOFTIRQ_RESTART;
528 struct softirq_action *h;
529 bool in_hardirq;
530 __u32 deferred;
531 __u32 pending;
532 int softirq_bit;
533
534 /*
535 * Mask out PF_MEMALLOC as the current task context is borrowed for the
536 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
537 * again if the socket is related to swapping.
538 */
539 current->flags &= ~PF_MEMALLOC;
540
541 pending = local_softirq_pending();
542
543 deferred = softirq_deferred_for_rt(&pending);
544 softirq_handle_begin();
545 in_hardirq = lockdep_softirq_start();
546 account_softirq_enter(current);
547
548 restart:
549 /* Reset the pending bitmask before enabling irqs */
550 set_softirq_pending(deferred);
551 __this_cpu_write(active_softirqs, pending);
552
553 local_irq_enable();
554
555 h = softirq_vec;
556
557 while ((softirq_bit = ffs(pending))) {
558 unsigned int vec_nr;
559 int prev_count;
560
561 h += softirq_bit - 1;
562
563 vec_nr = h - softirq_vec;
564 prev_count = preempt_count();
565
566 kstat_incr_softirqs_this_cpu(vec_nr);
567
568 trace_softirq_entry(vec_nr);
569 h->action(h);
570 trace_softirq_exit(vec_nr);
571 if (unlikely(prev_count != preempt_count())) {
572 pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
573 vec_nr, softirq_to_name[vec_nr], h->action,
574 prev_count, preempt_count());
575 preempt_count_set(prev_count);
576 }
577 h++;
578 pending >>= softirq_bit;
579 }
580
581 __this_cpu_write(active_softirqs, 0);
582 if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
583 __this_cpu_read(ksoftirqd) == current)
584 rcu_softirq_qs();
585
586 local_irq_disable();
587
588 pending = local_softirq_pending();
589 deferred = softirq_deferred_for_rt(&pending);
590
591 if (pending) {
592 if (time_before(jiffies, end) && !need_resched() &&
593 --max_restart)
594 goto restart;
595 }
596
597 if (pending | deferred)
598 wakeup_softirqd();
599
600 account_softirq_exit(current);
601 lockdep_softirq_end(in_hardirq);
602 softirq_handle_end();
603 current_restore_flags(old_flags, PF_MEMALLOC);
604 }
605
606 /**
607 * irq_enter_rcu - Enter an interrupt context with RCU watching
608 */
irq_enter_rcu(void)609 void irq_enter_rcu(void)
610 {
611 __irq_enter_raw();
612
613 if (tick_nohz_full_cpu(smp_processor_id()) ||
614 (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
615 tick_irq_enter();
616
617 account_hardirq_enter(current);
618 }
619
620 /**
621 * irq_enter - Enter an interrupt context including RCU update
622 */
irq_enter(void)623 void irq_enter(void)
624 {
625 rcu_irq_enter();
626 irq_enter_rcu();
627 }
628
tick_irq_exit(void)629 static inline void tick_irq_exit(void)
630 {
631 #ifdef CONFIG_NO_HZ_COMMON
632 int cpu = smp_processor_id();
633
634 /* Make sure that timer wheel updates are propagated */
635 if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
636 if (!in_irq())
637 tick_nohz_irq_exit();
638 }
639 #endif
640 }
641
__irq_exit_rcu(void)642 static inline void __irq_exit_rcu(void)
643 {
644 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
645 local_irq_disable();
646 #else
647 lockdep_assert_irqs_disabled();
648 #endif
649 account_hardirq_exit(current);
650 preempt_count_sub(HARDIRQ_OFFSET);
651 if (!in_interrupt() && local_softirq_pending())
652 invoke_softirq();
653
654 tick_irq_exit();
655 }
656
657 /**
658 * irq_exit_rcu() - Exit an interrupt context without updating RCU
659 *
660 * Also processes softirqs if needed and possible.
661 */
irq_exit_rcu(void)662 void irq_exit_rcu(void)
663 {
664 __irq_exit_rcu();
665 /* must be last! */
666 lockdep_hardirq_exit();
667 }
668
669 /**
670 * irq_exit - Exit an interrupt context, update RCU and lockdep
671 *
672 * Also processes softirqs if needed and possible.
673 */
irq_exit(void)674 void irq_exit(void)
675 {
676 __irq_exit_rcu();
677 rcu_irq_exit();
678 /* must be last! */
679 lockdep_hardirq_exit();
680 }
681
682 /*
683 * This function must run with irqs disabled!
684 */
raise_softirq_irqoff(unsigned int nr)685 inline void raise_softirq_irqoff(unsigned int nr)
686 {
687 __raise_softirq_irqoff(nr);
688
689 /*
690 * If we're in an interrupt or softirq, we're done
691 * (this also catches softirq-disabled code). We will
692 * actually run the softirq once we return from
693 * the irq or softirq.
694 *
695 * Otherwise we wake up ksoftirqd to make sure we
696 * schedule the softirq soon.
697 */
698 if (!in_interrupt() && should_wake_ksoftirqd())
699 wakeup_softirqd();
700 }
701
raise_softirq(unsigned int nr)702 void raise_softirq(unsigned int nr)
703 {
704 unsigned long flags;
705
706 local_irq_save(flags);
707 raise_softirq_irqoff(nr);
708 local_irq_restore(flags);
709 }
710
__raise_softirq_irqoff(unsigned int nr)711 void __raise_softirq_irqoff(unsigned int nr)
712 {
713 lockdep_assert_irqs_disabled();
714 trace_softirq_raise(nr);
715 or_softirq_pending(1UL << nr);
716 }
717
open_softirq(int nr,void (* action)(struct softirq_action *))718 void open_softirq(int nr, void (*action)(struct softirq_action *))
719 {
720 softirq_vec[nr].action = action;
721 }
722
723 /*
724 * Tasklets
725 */
726 struct tasklet_head {
727 struct tasklet_struct *head;
728 struct tasklet_struct **tail;
729 };
730
731 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
732 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
733
__tasklet_schedule_common(struct tasklet_struct * t,struct tasklet_head __percpu * headp,unsigned int softirq_nr)734 static void __tasklet_schedule_common(struct tasklet_struct *t,
735 struct tasklet_head __percpu *headp,
736 unsigned int softirq_nr)
737 {
738 struct tasklet_head *head;
739 unsigned long flags;
740
741 local_irq_save(flags);
742 head = this_cpu_ptr(headp);
743 t->next = NULL;
744 *head->tail = t;
745 head->tail = &(t->next);
746 raise_softirq_irqoff(softirq_nr);
747 local_irq_restore(flags);
748 }
749
__tasklet_schedule(struct tasklet_struct * t)750 void __tasklet_schedule(struct tasklet_struct *t)
751 {
752 __tasklet_schedule_common(t, &tasklet_vec,
753 TASKLET_SOFTIRQ);
754 }
755 EXPORT_SYMBOL(__tasklet_schedule);
756
__tasklet_hi_schedule(struct tasklet_struct * t)757 void __tasklet_hi_schedule(struct tasklet_struct *t)
758 {
759 __tasklet_schedule_common(t, &tasklet_hi_vec,
760 HI_SOFTIRQ);
761 }
762 EXPORT_SYMBOL(__tasklet_hi_schedule);
763
tasklet_clear_sched(struct tasklet_struct * t)764 static bool tasklet_clear_sched(struct tasklet_struct *t)
765 {
766 if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
767 wake_up_var(&t->state);
768 return true;
769 }
770
771 WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
772 t->use_callback ? "callback" : "func",
773 t->use_callback ? (void *)t->callback : (void *)t->func);
774
775 return false;
776 }
777
tasklet_action_common(struct softirq_action * a,struct tasklet_head * tl_head,unsigned int softirq_nr)778 static void tasklet_action_common(struct softirq_action *a,
779 struct tasklet_head *tl_head,
780 unsigned int softirq_nr)
781 {
782 struct tasklet_struct *list;
783
784 local_irq_disable();
785 list = tl_head->head;
786 tl_head->head = NULL;
787 tl_head->tail = &tl_head->head;
788 local_irq_enable();
789
790 while (list) {
791 struct tasklet_struct *t = list;
792
793 list = list->next;
794
795 if (tasklet_trylock(t)) {
796 if (!atomic_read(&t->count)) {
797 if (tasklet_clear_sched(t)) {
798 if (t->use_callback) {
799 trace_tasklet_entry(t->callback);
800 t->callback(t);
801 trace_tasklet_exit(t->callback);
802 } else {
803 trace_tasklet_entry(t->func);
804 t->func(t->data);
805 trace_tasklet_exit(t->func);
806 }
807 }
808 tasklet_unlock(t);
809 continue;
810 }
811 tasklet_unlock(t);
812 }
813
814 local_irq_disable();
815 t->next = NULL;
816 *tl_head->tail = t;
817 tl_head->tail = &t->next;
818 __raise_softirq_irqoff(softirq_nr);
819 local_irq_enable();
820 }
821 }
822
tasklet_action(struct softirq_action * a)823 static __latent_entropy void tasklet_action(struct softirq_action *a)
824 {
825 tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
826 }
827
tasklet_hi_action(struct softirq_action * a)828 static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
829 {
830 tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
831 }
832
tasklet_setup(struct tasklet_struct * t,void (* callback)(struct tasklet_struct *))833 void tasklet_setup(struct tasklet_struct *t,
834 void (*callback)(struct tasklet_struct *))
835 {
836 t->next = NULL;
837 t->state = 0;
838 atomic_set(&t->count, 0);
839 t->callback = callback;
840 t->use_callback = true;
841 t->data = 0;
842 }
843 EXPORT_SYMBOL(tasklet_setup);
844
tasklet_init(struct tasklet_struct * t,void (* func)(unsigned long),unsigned long data)845 void tasklet_init(struct tasklet_struct *t,
846 void (*func)(unsigned long), unsigned long data)
847 {
848 t->next = NULL;
849 t->state = 0;
850 atomic_set(&t->count, 0);
851 t->func = func;
852 t->use_callback = false;
853 t->data = data;
854 }
855 EXPORT_SYMBOL(tasklet_init);
856
857 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
858 /*
859 * Do not use in new code. Waiting for tasklets from atomic contexts is
860 * error prone and should be avoided.
861 */
tasklet_unlock_spin_wait(struct tasklet_struct * t)862 void tasklet_unlock_spin_wait(struct tasklet_struct *t)
863 {
864 while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
865 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
866 /*
867 * Prevent a live lock when current preempted soft
868 * interrupt processing or prevents ksoftirqd from
869 * running. If the tasklet runs on a different CPU
870 * then this has no effect other than doing the BH
871 * disable/enable dance for nothing.
872 */
873 local_bh_disable();
874 local_bh_enable();
875 } else {
876 cpu_relax();
877 }
878 }
879 }
880 EXPORT_SYMBOL(tasklet_unlock_spin_wait);
881 #endif
882
tasklet_kill(struct tasklet_struct * t)883 void tasklet_kill(struct tasklet_struct *t)
884 {
885 if (in_interrupt())
886 pr_notice("Attempt to kill tasklet from interrupt\n");
887
888 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
889 wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
890
891 tasklet_unlock_wait(t);
892 tasklet_clear_sched(t);
893 }
894 EXPORT_SYMBOL(tasklet_kill);
895
896 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
tasklet_unlock(struct tasklet_struct * t)897 void tasklet_unlock(struct tasklet_struct *t)
898 {
899 smp_mb__before_atomic();
900 clear_bit(TASKLET_STATE_RUN, &t->state);
901 smp_mb__after_atomic();
902 wake_up_var(&t->state);
903 }
904 EXPORT_SYMBOL_GPL(tasklet_unlock);
905
tasklet_unlock_wait(struct tasklet_struct * t)906 void tasklet_unlock_wait(struct tasklet_struct *t)
907 {
908 wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
909 }
910 EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
911 #endif
912
softirq_init(void)913 void __init softirq_init(void)
914 {
915 int cpu;
916
917 for_each_possible_cpu(cpu) {
918 per_cpu(tasklet_vec, cpu).tail =
919 &per_cpu(tasklet_vec, cpu).head;
920 per_cpu(tasklet_hi_vec, cpu).tail =
921 &per_cpu(tasklet_hi_vec, cpu).head;
922 }
923
924 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
925 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
926 }
927
ksoftirqd_should_run(unsigned int cpu)928 static int ksoftirqd_should_run(unsigned int cpu)
929 {
930 return local_softirq_pending();
931 }
932
run_ksoftirqd(unsigned int cpu)933 static void run_ksoftirqd(unsigned int cpu)
934 {
935 ksoftirqd_run_begin();
936 if (local_softirq_pending()) {
937 /*
938 * We can safely run softirq on inline stack, as we are not deep
939 * in the task stack here.
940 */
941 __do_softirq();
942 ksoftirqd_run_end();
943 cond_resched();
944 return;
945 }
946 ksoftirqd_run_end();
947 }
948
949 #ifdef CONFIG_HOTPLUG_CPU
takeover_tasklets(unsigned int cpu)950 static int takeover_tasklets(unsigned int cpu)
951 {
952 /* CPU is dead, so no lock needed. */
953 local_irq_disable();
954
955 /* Find end, append list for that CPU. */
956 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
957 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
958 __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
959 per_cpu(tasklet_vec, cpu).head = NULL;
960 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
961 }
962 raise_softirq_irqoff(TASKLET_SOFTIRQ);
963
964 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
965 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
966 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
967 per_cpu(tasklet_hi_vec, cpu).head = NULL;
968 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
969 }
970 raise_softirq_irqoff(HI_SOFTIRQ);
971
972 local_irq_enable();
973 return 0;
974 }
975 #else
976 #define takeover_tasklets NULL
977 #endif /* CONFIG_HOTPLUG_CPU */
978
979 static struct smp_hotplug_thread softirq_threads = {
980 .store = &ksoftirqd,
981 .thread_should_run = ksoftirqd_should_run,
982 .thread_fn = run_ksoftirqd,
983 .thread_comm = "ksoftirqd/%u",
984 };
985
spawn_ksoftirqd(void)986 static __init int spawn_ksoftirqd(void)
987 {
988 cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
989 takeover_tasklets);
990 BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
991
992 return 0;
993 }
994 early_initcall(spawn_ksoftirqd);
995
996 /*
997 * [ These __weak aliases are kept in a separate compilation unit, so that
998 * GCC does not inline them incorrectly. ]
999 */
1000
early_irq_init(void)1001 int __init __weak early_irq_init(void)
1002 {
1003 return 0;
1004 }
1005
arch_probe_nr_irqs(void)1006 int __init __weak arch_probe_nr_irqs(void)
1007 {
1008 return NR_IRQS_LEGACY;
1009 }
1010
arch_early_irq_init(void)1011 int __init __weak arch_early_irq_init(void)
1012 {
1013 return 0;
1014 }
1015
arch_dynirq_lower_bound(unsigned int from)1016 unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1017 {
1018 return from;
1019 }
1020