• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
3  * Internal non-public definitions that provide either classic
4  * or preemptible semantics.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19  *
20  * Copyright (c) 2010 Linaro
21  *
22  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23  */
24 
25 #include <linux/kthread.h>
26 #include <linux/module.h>
27 #include <linux/debugfs.h>
28 #include <linux/seq_file.h>
29 
30 /* Global control variables for rcupdate callback mechanism. */
31 struct rcu_ctrlblk {
32 	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */
33 	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
34 	struct rcu_head **curtail;	/* ->next pointer of last CB. */
35 	RCU_TRACE(long qlen);		/* Number of pending CBs. */
36 	RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
37 	RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
38 	RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
39 	RCU_TRACE(char *name);		/* Name of RCU type. */
40 };
41 
42 /* Definition for rcupdate control block. */
43 static struct rcu_ctrlblk rcu_sched_ctrlblk = {
44 	.donetail	= &rcu_sched_ctrlblk.rcucblist,
45 	.curtail	= &rcu_sched_ctrlblk.rcucblist,
46 	RCU_TRACE(.name = "rcu_sched")
47 };
48 
49 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
50 	.donetail	= &rcu_bh_ctrlblk.rcucblist,
51 	.curtail	= &rcu_bh_ctrlblk.rcucblist,
52 	RCU_TRACE(.name = "rcu_bh")
53 };
54 
55 #ifdef CONFIG_DEBUG_LOCK_ALLOC
56 int rcu_scheduler_active __read_mostly;
57 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
58 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
59 
60 #ifdef CONFIG_RCU_TRACE
61 
check_cpu_stall(struct rcu_ctrlblk * rcp)62 static void check_cpu_stall(struct rcu_ctrlblk *rcp)
63 {
64 	unsigned long j;
65 	unsigned long js;
66 
67 	if (rcu_cpu_stall_suppress)
68 		return;
69 	rcp->ticks_this_gp++;
70 	j = jiffies;
71 	js = rcp->jiffies_stall;
72 	if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
73 		pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
74 		       rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
75 		       jiffies - rcp->gp_start, rcp->qlen);
76 		dump_stack();
77 	}
78 	if (*rcp->curtail && ULONG_CMP_GE(j, js))
79 		rcp->jiffies_stall = jiffies +
80 			3 * rcu_jiffies_till_stall_check() + 3;
81 	else if (ULONG_CMP_GE(j, js))
82 		rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
83 }
84 
85 static void check_cpu_stall_preempt(void);
86 
87 #endif /* #ifdef CONFIG_RCU_TRACE */
88 
reset_cpu_stall_ticks(struct rcu_ctrlblk * rcp)89 static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
90 {
91 #ifdef CONFIG_RCU_TRACE
92 	rcp->ticks_this_gp = 0;
93 	rcp->gp_start = jiffies;
94 	rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
95 #endif /* #ifdef CONFIG_RCU_TRACE */
96 }
97 
check_cpu_stalls(void)98 static void check_cpu_stalls(void)
99 {
100 	RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
101 	RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
102 	RCU_TRACE(check_cpu_stall_preempt());
103 }
104 
105 #ifdef CONFIG_TINY_PREEMPT_RCU
106 
107 #include <linux/delay.h>
108 
109 /* Global control variables for preemptible RCU. */
110 struct rcu_preempt_ctrlblk {
111 	struct rcu_ctrlblk rcb;	/* curtail: ->next ptr of last CB for GP. */
112 	struct rcu_head **nexttail;
113 				/* Tasks blocked in a preemptible RCU */
114 				/*  read-side critical section while an */
115 				/*  preemptible-RCU grace period is in */
116 				/*  progress must wait for a later grace */
117 				/*  period.  This pointer points to the */
118 				/*  ->next pointer of the last task that */
119 				/*  must wait for a later grace period, or */
120 				/*  to &->rcb.rcucblist if there is no */
121 				/*  such task. */
122 	struct list_head blkd_tasks;
123 				/* Tasks blocked in RCU read-side critical */
124 				/*  section.  Tasks are placed at the head */
125 				/*  of this list and age towards the tail. */
126 	struct list_head *gp_tasks;
127 				/* Pointer to the first task blocking the */
128 				/*  current grace period, or NULL if there */
129 				/*  is no such task. */
130 	struct list_head *exp_tasks;
131 				/* Pointer to first task blocking the */
132 				/*  current expedited grace period, or NULL */
133 				/*  if there is no such task.  If there */
134 				/*  is no current expedited grace period, */
135 				/*  then there cannot be any such task. */
136 #ifdef CONFIG_RCU_BOOST
137 	struct list_head *boost_tasks;
138 				/* Pointer to first task that needs to be */
139 				/*  priority-boosted, or NULL if no priority */
140 				/*  boosting is needed.  If there is no */
141 				/*  current or expedited grace period, there */
142 				/*  can be no such task. */
143 #endif /* #ifdef CONFIG_RCU_BOOST */
144 	u8 gpnum;		/* Current grace period. */
145 	u8 gpcpu;		/* Last grace period blocked by the CPU. */
146 	u8 completed;		/* Last grace period completed. */
147 				/*  If all three are equal, RCU is idle. */
148 #ifdef CONFIG_RCU_BOOST
149 	unsigned long boost_time; /* When to start boosting (jiffies) */
150 #endif /* #ifdef CONFIG_RCU_BOOST */
151 #ifdef CONFIG_RCU_TRACE
152 	unsigned long n_grace_periods;
153 #ifdef CONFIG_RCU_BOOST
154 	unsigned long n_tasks_boosted;
155 				/* Total number of tasks boosted. */
156 	unsigned long n_exp_boosts;
157 				/* Number of tasks boosted for expedited GP. */
158 	unsigned long n_normal_boosts;
159 				/* Number of tasks boosted for normal GP. */
160 	unsigned long n_balk_blkd_tasks;
161 				/* Refused to boost: no blocked tasks. */
162 	unsigned long n_balk_exp_gp_tasks;
163 				/* Refused to boost: nothing blocking GP. */
164 	unsigned long n_balk_boost_tasks;
165 				/* Refused to boost: already boosting. */
166 	unsigned long n_balk_notyet;
167 				/* Refused to boost: not yet time. */
168 	unsigned long n_balk_nos;
169 				/* Refused to boost: not sure why, though. */
170 				/*  This can happen due to race conditions. */
171 #endif /* #ifdef CONFIG_RCU_BOOST */
172 #endif /* #ifdef CONFIG_RCU_TRACE */
173 };
174 
175 static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
176 	.rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
177 	.rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
178 	.nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
179 	.blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
180 	RCU_TRACE(.rcb.name = "rcu_preempt")
181 };
182 
183 static int rcu_preempted_readers_exp(void);
184 static void rcu_report_exp_done(void);
185 
186 /*
187  * Return true if the CPU has not yet responded to the current grace period.
188  */
rcu_cpu_blocking_cur_gp(void)189 static int rcu_cpu_blocking_cur_gp(void)
190 {
191 	return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
192 }
193 
194 /*
195  * Check for a running RCU reader.  Because there is only one CPU,
196  * there can be but one running RCU reader at a time.  ;-)
197  *
198  * Returns zero if there are no running readers.  Returns a positive
199  * number if there is at least one reader within its RCU read-side
200  * critical section.  Returns a negative number if an outermost reader
201  * is in the midst of exiting from its RCU read-side critical section
202  *
203  * Returns zero if there are no running readers.  Returns a positive
204  * number if there is at least one reader within its RCU read-side
205  * critical section.  Returns a negative number if an outermost reader
206  * is in the midst of exiting from its RCU read-side critical section.
207  */
rcu_preempt_running_reader(void)208 static int rcu_preempt_running_reader(void)
209 {
210 	return current->rcu_read_lock_nesting;
211 }
212 
213 /*
214  * Check for preempted RCU readers blocking any grace period.
215  * If the caller needs a reliable answer, it must disable hard irqs.
216  */
rcu_preempt_blocked_readers_any(void)217 static int rcu_preempt_blocked_readers_any(void)
218 {
219 	return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
220 }
221 
222 /*
223  * Check for preempted RCU readers blocking the current grace period.
224  * If the caller needs a reliable answer, it must disable hard irqs.
225  */
rcu_preempt_blocked_readers_cgp(void)226 static int rcu_preempt_blocked_readers_cgp(void)
227 {
228 	return rcu_preempt_ctrlblk.gp_tasks != NULL;
229 }
230 
231 /*
232  * Return true if another preemptible-RCU grace period is needed.
233  */
rcu_preempt_needs_another_gp(void)234 static int rcu_preempt_needs_another_gp(void)
235 {
236 	return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
237 }
238 
239 /*
240  * Return true if a preemptible-RCU grace period is in progress.
241  * The caller must disable hardirqs.
242  */
rcu_preempt_gp_in_progress(void)243 static int rcu_preempt_gp_in_progress(void)
244 {
245 	return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
246 }
247 
248 /*
249  * Advance a ->blkd_tasks-list pointer to the next entry, instead
250  * returning NULL if at the end of the list.
251  */
rcu_next_node_entry(struct task_struct * t)252 static struct list_head *rcu_next_node_entry(struct task_struct *t)
253 {
254 	struct list_head *np;
255 
256 	np = t->rcu_node_entry.next;
257 	if (np == &rcu_preempt_ctrlblk.blkd_tasks)
258 		np = NULL;
259 	return np;
260 }
261 
262 #ifdef CONFIG_RCU_TRACE
263 
264 #ifdef CONFIG_RCU_BOOST
265 static void rcu_initiate_boost_trace(void);
266 #endif /* #ifdef CONFIG_RCU_BOOST */
267 
268 /*
269  * Dump additional statistice for TINY_PREEMPT_RCU.
270  */
show_tiny_preempt_stats(struct seq_file * m)271 static void show_tiny_preempt_stats(struct seq_file *m)
272 {
273 	seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n",
274 		   rcu_preempt_ctrlblk.rcb.qlen,
275 		   rcu_preempt_ctrlblk.n_grace_periods,
276 		   rcu_preempt_ctrlblk.gpnum,
277 		   rcu_preempt_ctrlblk.gpcpu,
278 		   rcu_preempt_ctrlblk.completed,
279 		   "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)],
280 		   "N."[!rcu_preempt_ctrlblk.gp_tasks],
281 		   "E."[!rcu_preempt_ctrlblk.exp_tasks]);
282 #ifdef CONFIG_RCU_BOOST
283 	seq_printf(m, "%sttb=%c ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
284 		   "             ",
285 		   "B."[!rcu_preempt_ctrlblk.boost_tasks],
286 		   rcu_preempt_ctrlblk.n_tasks_boosted,
287 		   rcu_preempt_ctrlblk.n_exp_boosts,
288 		   rcu_preempt_ctrlblk.n_normal_boosts,
289 		   (int)(jiffies & 0xffff),
290 		   (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
291 	seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n",
292 		   "             balk",
293 		   rcu_preempt_ctrlblk.n_balk_blkd_tasks,
294 		   rcu_preempt_ctrlblk.n_balk_exp_gp_tasks,
295 		   rcu_preempt_ctrlblk.n_balk_boost_tasks,
296 		   rcu_preempt_ctrlblk.n_balk_notyet,
297 		   rcu_preempt_ctrlblk.n_balk_nos);
298 #endif /* #ifdef CONFIG_RCU_BOOST */
299 }
300 
301 #endif /* #ifdef CONFIG_RCU_TRACE */
302 
303 #ifdef CONFIG_RCU_BOOST
304 
305 #include "rtmutex_common.h"
306 
307 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
308 
309 /* Controls for rcu_kthread() kthread. */
310 static struct task_struct *rcu_kthread_task;
311 static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
312 static unsigned long have_rcu_kthread_work;
313 
314 /*
315  * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
316  * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
317  */
rcu_boost(void)318 static int rcu_boost(void)
319 {
320 	unsigned long flags;
321 	struct rt_mutex mtx;
322 	struct task_struct *t;
323 	struct list_head *tb;
324 
325 	if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
326 	    rcu_preempt_ctrlblk.exp_tasks == NULL)
327 		return 0;  /* Nothing to boost. */
328 
329 	local_irq_save(flags);
330 
331 	/*
332 	 * Recheck with irqs disabled: all tasks in need of boosting
333 	 * might exit their RCU read-side critical sections on their own
334 	 * if we are preempted just before disabling irqs.
335 	 */
336 	if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
337 	    rcu_preempt_ctrlblk.exp_tasks == NULL) {
338 		local_irq_restore(flags);
339 		return 0;
340 	}
341 
342 	/*
343 	 * Preferentially boost tasks blocking expedited grace periods.
344 	 * This cannot starve the normal grace periods because a second
345 	 * expedited grace period must boost all blocked tasks, including
346 	 * those blocking the pre-existing normal grace period.
347 	 */
348 	if (rcu_preempt_ctrlblk.exp_tasks != NULL) {
349 		tb = rcu_preempt_ctrlblk.exp_tasks;
350 		RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
351 	} else {
352 		tb = rcu_preempt_ctrlblk.boost_tasks;
353 		RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
354 	}
355 	RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
356 
357 	/*
358 	 * We boost task t by manufacturing an rt_mutex that appears to
359 	 * be held by task t.  We leave a pointer to that rt_mutex where
360 	 * task t can find it, and task t will release the mutex when it
361 	 * exits its outermost RCU read-side critical section.  Then
362 	 * simply acquiring this artificial rt_mutex will boost task
363 	 * t's priority.  (Thanks to tglx for suggesting this approach!)
364 	 */
365 	t = container_of(tb, struct task_struct, rcu_node_entry);
366 	rt_mutex_init_proxy_locked(&mtx, t);
367 	t->rcu_boost_mutex = &mtx;
368 	local_irq_restore(flags);
369 	rt_mutex_lock(&mtx);
370 	rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
371 
372 	return ACCESS_ONCE(rcu_preempt_ctrlblk.boost_tasks) != NULL ||
373 	       ACCESS_ONCE(rcu_preempt_ctrlblk.exp_tasks) != NULL;
374 }
375 
376 /*
377  * Check to see if it is now time to start boosting RCU readers blocking
378  * the current grace period, and, if so, tell the rcu_kthread_task to
379  * start boosting them.  If there is an expedited boost in progress,
380  * we wait for it to complete.
381  *
382  * If there are no blocked readers blocking the current grace period,
383  * return 0 to let the caller know, otherwise return 1.  Note that this
384  * return value is independent of whether or not boosting was done.
385  */
rcu_initiate_boost(void)386 static int rcu_initiate_boost(void)
387 {
388 	if (!rcu_preempt_blocked_readers_cgp() &&
389 	    rcu_preempt_ctrlblk.exp_tasks == NULL) {
390 		RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++);
391 		return 0;
392 	}
393 	if (rcu_preempt_ctrlblk.exp_tasks != NULL ||
394 	    (rcu_preempt_ctrlblk.gp_tasks != NULL &&
395 	     rcu_preempt_ctrlblk.boost_tasks == NULL &&
396 	     ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) {
397 		if (rcu_preempt_ctrlblk.exp_tasks == NULL)
398 			rcu_preempt_ctrlblk.boost_tasks =
399 				rcu_preempt_ctrlblk.gp_tasks;
400 		invoke_rcu_callbacks();
401 	} else {
402 		RCU_TRACE(rcu_initiate_boost_trace());
403 	}
404 	return 1;
405 }
406 
407 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
408 
409 /*
410  * Do priority-boost accounting for the start of a new grace period.
411  */
rcu_preempt_boost_start_gp(void)412 static void rcu_preempt_boost_start_gp(void)
413 {
414 	rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
415 }
416 
417 #else /* #ifdef CONFIG_RCU_BOOST */
418 
419 /*
420  * If there is no RCU priority boosting, we don't initiate boosting,
421  * but we do indicate whether there are blocked readers blocking the
422  * current grace period.
423  */
rcu_initiate_boost(void)424 static int rcu_initiate_boost(void)
425 {
426 	return rcu_preempt_blocked_readers_cgp();
427 }
428 
429 /*
430  * If there is no RCU priority boosting, nothing to do at grace-period start.
431  */
rcu_preempt_boost_start_gp(void)432 static void rcu_preempt_boost_start_gp(void)
433 {
434 }
435 
436 #endif /* else #ifdef CONFIG_RCU_BOOST */
437 
438 /*
439  * Record a preemptible-RCU quiescent state for the specified CPU.  Note
440  * that this just means that the task currently running on the CPU is
441  * in a quiescent state.  There might be any number of tasks blocked
442  * while in an RCU read-side critical section.
443  *
444  * Unlike the other rcu_*_qs() functions, callers to this function
445  * must disable irqs in order to protect the assignment to
446  * ->rcu_read_unlock_special.
447  *
448  * Because this is a single-CPU implementation, the only way a grace
449  * period can end is if the CPU is in a quiescent state.  The reason is
450  * that a blocked preemptible-RCU reader can exit its critical section
451  * only if the CPU is running it at the time.  Therefore, when the
452  * last task blocking the current grace period exits its RCU read-side
453  * critical section, neither the CPU nor blocked tasks will be stopping
454  * the current grace period.  (In contrast, SMP implementations
455  * might have CPUs running in RCU read-side critical sections that
456  * block later grace periods -- but this is not possible given only
457  * one CPU.)
458  */
rcu_preempt_cpu_qs(void)459 static void rcu_preempt_cpu_qs(void)
460 {
461 	/* Record both CPU and task as having responded to current GP. */
462 	rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
463 	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
464 
465 	/* If there is no GP then there is nothing more to do.  */
466 	if (!rcu_preempt_gp_in_progress())
467 		return;
468 	/*
469 	 * Check up on boosting.  If there are readers blocking the
470 	 * current grace period, leave.
471 	 */
472 	if (rcu_initiate_boost())
473 		return;
474 
475 	/* Advance callbacks. */
476 	rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
477 	rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
478 	rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
479 
480 	/* If there are no blocked readers, next GP is done instantly. */
481 	if (!rcu_preempt_blocked_readers_any())
482 		rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
483 
484 	/* If there are done callbacks, cause them to be invoked. */
485 	if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
486 		invoke_rcu_callbacks();
487 }
488 
489 /*
490  * Start a new RCU grace period if warranted.  Hard irqs must be disabled.
491  */
rcu_preempt_start_gp(void)492 static void rcu_preempt_start_gp(void)
493 {
494 	if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
495 
496 		/* Official start of GP. */
497 		rcu_preempt_ctrlblk.gpnum++;
498 		RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
499 		reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
500 
501 		/* Any blocked RCU readers block new GP. */
502 		if (rcu_preempt_blocked_readers_any())
503 			rcu_preempt_ctrlblk.gp_tasks =
504 				rcu_preempt_ctrlblk.blkd_tasks.next;
505 
506 		/* Set up for RCU priority boosting. */
507 		rcu_preempt_boost_start_gp();
508 
509 		/* If there is no running reader, CPU is done with GP. */
510 		if (!rcu_preempt_running_reader())
511 			rcu_preempt_cpu_qs();
512 	}
513 }
514 
515 /*
516  * We have entered the scheduler, and the current task might soon be
517  * context-switched away from.  If this task is in an RCU read-side
518  * critical section, we will no longer be able to rely on the CPU to
519  * record that fact, so we enqueue the task on the blkd_tasks list.
520  * If the task started after the current grace period began, as recorded
521  * by ->gpcpu, we enqueue at the beginning of the list.  Otherwise
522  * before the element referenced by ->gp_tasks (or at the tail if
523  * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
524  * The task will dequeue itself when it exits the outermost enclosing
525  * RCU read-side critical section.  Therefore, the current grace period
526  * cannot be permitted to complete until the ->gp_tasks pointer becomes
527  * NULL.
528  *
529  * Caller must disable preemption.
530  */
rcu_preempt_note_context_switch(void)531 void rcu_preempt_note_context_switch(void)
532 {
533 	struct task_struct *t = current;
534 	unsigned long flags;
535 
536 	local_irq_save(flags); /* must exclude scheduler_tick(). */
537 	if (rcu_preempt_running_reader() > 0 &&
538 	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
539 
540 		/* Possibly blocking in an RCU read-side critical section. */
541 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
542 
543 		/*
544 		 * If this CPU has already checked in, then this task
545 		 * will hold up the next grace period rather than the
546 		 * current grace period.  Queue the task accordingly.
547 		 * If the task is queued for the current grace period
548 		 * (i.e., this CPU has not yet passed through a quiescent
549 		 * state for the current grace period), then as long
550 		 * as that task remains queued, the current grace period
551 		 * cannot end.
552 		 */
553 		list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
554 		if (rcu_cpu_blocking_cur_gp())
555 			rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
556 	} else if (rcu_preempt_running_reader() < 0 &&
557 		   t->rcu_read_unlock_special) {
558 		/*
559 		 * Complete exit from RCU read-side critical section on
560 		 * behalf of preempted instance of __rcu_read_unlock().
561 		 */
562 		rcu_read_unlock_special(t);
563 	}
564 
565 	/*
566 	 * Either we were not in an RCU read-side critical section to
567 	 * begin with, or we have now recorded that critical section
568 	 * globally.  Either way, we can now note a quiescent state
569 	 * for this CPU.  Again, if we were in an RCU read-side critical
570 	 * section, and if that critical section was blocking the current
571 	 * grace period, then the fact that the task has been enqueued
572 	 * means that current grace period continues to be blocked.
573 	 */
574 	rcu_preempt_cpu_qs();
575 	local_irq_restore(flags);
576 }
577 
578 /*
579  * Handle special cases during rcu_read_unlock(), such as needing to
580  * notify RCU core processing or task having blocked during the RCU
581  * read-side critical section.
582  */
rcu_read_unlock_special(struct task_struct * t)583 void rcu_read_unlock_special(struct task_struct *t)
584 {
585 	int empty;
586 	int empty_exp;
587 	unsigned long flags;
588 	struct list_head *np;
589 #ifdef CONFIG_RCU_BOOST
590 	struct rt_mutex *rbmp = NULL;
591 #endif /* #ifdef CONFIG_RCU_BOOST */
592 	int special;
593 
594 	/*
595 	 * NMI handlers cannot block and cannot safely manipulate state.
596 	 * They therefore cannot possibly be special, so just leave.
597 	 */
598 	if (in_nmi())
599 		return;
600 
601 	local_irq_save(flags);
602 
603 	/*
604 	 * If RCU core is waiting for this CPU to exit critical section,
605 	 * let it know that we have done so.
606 	 */
607 	special = t->rcu_read_unlock_special;
608 	if (special & RCU_READ_UNLOCK_NEED_QS)
609 		rcu_preempt_cpu_qs();
610 
611 	/* Hardware IRQ handlers cannot block. */
612 	if (in_irq() || in_serving_softirq()) {
613 		local_irq_restore(flags);
614 		return;
615 	}
616 
617 	/* Clean up if blocked during RCU read-side critical section. */
618 	if (special & RCU_READ_UNLOCK_BLOCKED) {
619 		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
620 
621 		/*
622 		 * Remove this task from the ->blkd_tasks list and adjust
623 		 * any pointers that might have been referencing it.
624 		 */
625 		empty = !rcu_preempt_blocked_readers_cgp();
626 		empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
627 		np = rcu_next_node_entry(t);
628 		list_del_init(&t->rcu_node_entry);
629 		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
630 			rcu_preempt_ctrlblk.gp_tasks = np;
631 		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
632 			rcu_preempt_ctrlblk.exp_tasks = np;
633 #ifdef CONFIG_RCU_BOOST
634 		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
635 			rcu_preempt_ctrlblk.boost_tasks = np;
636 #endif /* #ifdef CONFIG_RCU_BOOST */
637 
638 		/*
639 		 * If this was the last task on the current list, and if
640 		 * we aren't waiting on the CPU, report the quiescent state
641 		 * and start a new grace period if needed.
642 		 */
643 		if (!empty && !rcu_preempt_blocked_readers_cgp()) {
644 			rcu_preempt_cpu_qs();
645 			rcu_preempt_start_gp();
646 		}
647 
648 		/*
649 		 * If this was the last task on the expedited lists,
650 		 * then we need wake up the waiting task.
651 		 */
652 		if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
653 			rcu_report_exp_done();
654 	}
655 #ifdef CONFIG_RCU_BOOST
656 	/* Unboost self if was boosted. */
657 	if (t->rcu_boost_mutex != NULL) {
658 		rbmp = t->rcu_boost_mutex;
659 		t->rcu_boost_mutex = NULL;
660 		rt_mutex_unlock(rbmp);
661 	}
662 #endif /* #ifdef CONFIG_RCU_BOOST */
663 	local_irq_restore(flags);
664 }
665 
666 /*
667  * Check for a quiescent state from the current CPU.  When a task blocks,
668  * the task is recorded in the rcu_preempt_ctrlblk structure, which is
669  * checked elsewhere.  This is called from the scheduling-clock interrupt.
670  *
671  * Caller must disable hard irqs.
672  */
rcu_preempt_check_callbacks(void)673 static void rcu_preempt_check_callbacks(void)
674 {
675 	struct task_struct *t = current;
676 
677 	if (rcu_preempt_gp_in_progress() &&
678 	    (!rcu_preempt_running_reader() ||
679 	     !rcu_cpu_blocking_cur_gp()))
680 		rcu_preempt_cpu_qs();
681 	if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
682 	    rcu_preempt_ctrlblk.rcb.donetail)
683 		invoke_rcu_callbacks();
684 	if (rcu_preempt_gp_in_progress() &&
685 	    rcu_cpu_blocking_cur_gp() &&
686 	    rcu_preempt_running_reader() > 0)
687 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
688 }
689 
690 /*
691  * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
692  * update, so this is invoked from rcu_process_callbacks() to
693  * handle that case.  Of course, it is invoked for all flavors of
694  * RCU, but RCU callbacks can appear only on one of the lists, and
695  * neither ->nexttail nor ->donetail can possibly be NULL, so there
696  * is no need for an explicit check.
697  */
rcu_preempt_remove_callbacks(struct rcu_ctrlblk * rcp)698 static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
699 {
700 	if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
701 		rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
702 }
703 
704 /*
705  * Process callbacks for preemptible RCU.
706  */
rcu_preempt_process_callbacks(void)707 static void rcu_preempt_process_callbacks(void)
708 {
709 	__rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
710 }
711 
712 /*
713  * Queue a preemptible -RCU callback for invocation after a grace period.
714  */
call_rcu(struct rcu_head * head,void (* func)(struct rcu_head * rcu))715 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
716 {
717 	unsigned long flags;
718 
719 	debug_rcu_head_queue(head);
720 	head->func = func;
721 	head->next = NULL;
722 
723 	local_irq_save(flags);
724 	*rcu_preempt_ctrlblk.nexttail = head;
725 	rcu_preempt_ctrlblk.nexttail = &head->next;
726 	RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++);
727 	rcu_preempt_start_gp();  /* checks to see if GP needed. */
728 	local_irq_restore(flags);
729 }
730 EXPORT_SYMBOL_GPL(call_rcu);
731 
732 /*
733  * synchronize_rcu - wait until a grace period has elapsed.
734  *
735  * Control will return to the caller some time after a full grace
736  * period has elapsed, in other words after all currently executing RCU
737  * read-side critical sections have completed.  RCU read-side critical
738  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
739  * and may be nested.
740  */
synchronize_rcu(void)741 void synchronize_rcu(void)
742 {
743 	rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
744 			   !lock_is_held(&rcu_lock_map) &&
745 			   !lock_is_held(&rcu_sched_lock_map),
746 			   "Illegal synchronize_rcu() in RCU read-side critical section");
747 
748 #ifdef CONFIG_DEBUG_LOCK_ALLOC
749 	if (!rcu_scheduler_active)
750 		return;
751 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
752 
753 	WARN_ON_ONCE(rcu_preempt_running_reader());
754 	if (!rcu_preempt_blocked_readers_any())
755 		return;
756 
757 	/* Once we get past the fastpath checks, same code as rcu_barrier(). */
758 	if (rcu_expedited)
759 		synchronize_rcu_expedited();
760 	else
761 		rcu_barrier();
762 }
763 EXPORT_SYMBOL_GPL(synchronize_rcu);
764 
765 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
766 static unsigned long sync_rcu_preempt_exp_count;
767 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
768 
769 /*
770  * Return non-zero if there are any tasks in RCU read-side critical
771  * sections blocking the current preemptible-RCU expedited grace period.
772  * If there is no preemptible-RCU expedited grace period currently in
773  * progress, returns zero unconditionally.
774  */
rcu_preempted_readers_exp(void)775 static int rcu_preempted_readers_exp(void)
776 {
777 	return rcu_preempt_ctrlblk.exp_tasks != NULL;
778 }
779 
780 /*
781  * Report the exit from RCU read-side critical section for the last task
782  * that queued itself during or before the current expedited preemptible-RCU
783  * grace period.
784  */
rcu_report_exp_done(void)785 static void rcu_report_exp_done(void)
786 {
787 	wake_up(&sync_rcu_preempt_exp_wq);
788 }
789 
790 /*
791  * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
792  * is to rely in the fact that there is but one CPU, and that it is
793  * illegal for a task to invoke synchronize_rcu_expedited() while in a
794  * preemptible-RCU read-side critical section.  Therefore, any such
795  * critical sections must correspond to blocked tasks, which must therefore
796  * be on the ->blkd_tasks list.  So just record the current head of the
797  * list in the ->exp_tasks pointer, and wait for all tasks including and
798  * after the task pointed to by ->exp_tasks to drain.
799  */
synchronize_rcu_expedited(void)800 void synchronize_rcu_expedited(void)
801 {
802 	unsigned long flags;
803 	struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
804 	unsigned long snap;
805 
806 	barrier(); /* ensure prior action seen before grace period. */
807 
808 	WARN_ON_ONCE(rcu_preempt_running_reader());
809 
810 	/*
811 	 * Acquire lock so that there is only one preemptible RCU grace
812 	 * period in flight.  Of course, if someone does the expedited
813 	 * grace period for us while we are acquiring the lock, just leave.
814 	 */
815 	snap = sync_rcu_preempt_exp_count + 1;
816 	mutex_lock(&sync_rcu_preempt_exp_mutex);
817 	if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
818 		goto unlock_mb_ret; /* Others did our work for us. */
819 
820 	local_irq_save(flags);
821 
822 	/*
823 	 * All RCU readers have to already be on blkd_tasks because
824 	 * we cannot legally be executing in an RCU read-side critical
825 	 * section.
826 	 */
827 
828 	/* Snapshot current head of ->blkd_tasks list. */
829 	rpcp->exp_tasks = rpcp->blkd_tasks.next;
830 	if (rpcp->exp_tasks == &rpcp->blkd_tasks)
831 		rpcp->exp_tasks = NULL;
832 
833 	/* Wait for tail of ->blkd_tasks list to drain. */
834 	if (!rcu_preempted_readers_exp()) {
835 		local_irq_restore(flags);
836 	} else {
837 		rcu_initiate_boost();
838 		local_irq_restore(flags);
839 		wait_event(sync_rcu_preempt_exp_wq,
840 			   !rcu_preempted_readers_exp());
841 	}
842 
843 	/* Clean up and exit. */
844 	barrier(); /* ensure expedited GP seen before counter increment. */
845 	sync_rcu_preempt_exp_count++;
846 unlock_mb_ret:
847 	mutex_unlock(&sync_rcu_preempt_exp_mutex);
848 	barrier(); /* ensure subsequent action seen after grace period. */
849 }
850 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
851 
852 /*
853  * Does preemptible RCU need the CPU to stay out of dynticks mode?
854  */
rcu_preempt_needs_cpu(void)855 int rcu_preempt_needs_cpu(void)
856 {
857 	return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
858 }
859 
860 #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
861 
862 #ifdef CONFIG_RCU_TRACE
863 
864 /*
865  * Because preemptible RCU does not exist, it is not necessary to
866  * dump out its statistics.
867  */
show_tiny_preempt_stats(struct seq_file * m)868 static void show_tiny_preempt_stats(struct seq_file *m)
869 {
870 }
871 
872 #endif /* #ifdef CONFIG_RCU_TRACE */
873 
874 /*
875  * Because preemptible RCU does not exist, it never has any callbacks
876  * to check.
877  */
rcu_preempt_check_callbacks(void)878 static void rcu_preempt_check_callbacks(void)
879 {
880 }
881 
882 /*
883  * Because preemptible RCU does not exist, it never has any callbacks
884  * to remove.
885  */
rcu_preempt_remove_callbacks(struct rcu_ctrlblk * rcp)886 static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
887 {
888 }
889 
890 /*
891  * Because preemptible RCU does not exist, it never has any callbacks
892  * to process.
893  */
rcu_preempt_process_callbacks(void)894 static void rcu_preempt_process_callbacks(void)
895 {
896 }
897 
898 #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
899 
900 #ifdef CONFIG_RCU_BOOST
901 
902 /*
903  * Wake up rcu_kthread() to process callbacks now eligible for invocation
904  * or to boost readers.
905  */
invoke_rcu_callbacks(void)906 static void invoke_rcu_callbacks(void)
907 {
908 	have_rcu_kthread_work = 1;
909 	if (rcu_kthread_task != NULL)
910 		wake_up(&rcu_kthread_wq);
911 }
912 
913 #ifdef CONFIG_RCU_TRACE
914 
915 /*
916  * Is the current CPU running the RCU-callbacks kthread?
917  * Caller must have preemption disabled.
918  */
rcu_is_callbacks_kthread(void)919 static bool rcu_is_callbacks_kthread(void)
920 {
921 	return rcu_kthread_task == current;
922 }
923 
924 #endif /* #ifdef CONFIG_RCU_TRACE */
925 
926 /*
927  * This kthread invokes RCU callbacks whose grace periods have
928  * elapsed.  It is awakened as needed, and takes the place of the
929  * RCU_SOFTIRQ that is used for this purpose when boosting is disabled.
930  * This is a kthread, but it is never stopped, at least not until
931  * the system goes down.
932  */
rcu_kthread(void * arg)933 static int rcu_kthread(void *arg)
934 {
935 	unsigned long work;
936 	unsigned long morework;
937 	unsigned long flags;
938 
939 	for (;;) {
940 		wait_event_interruptible(rcu_kthread_wq,
941 					 have_rcu_kthread_work != 0);
942 		morework = rcu_boost();
943 		local_irq_save(flags);
944 		work = have_rcu_kthread_work;
945 		have_rcu_kthread_work = morework;
946 		local_irq_restore(flags);
947 		if (work)
948 			rcu_process_callbacks(NULL);
949 		schedule_timeout_interruptible(1); /* Leave CPU for others. */
950 	}
951 
952 	return 0;  /* Not reached, but needed to shut gcc up. */
953 }
954 
955 /*
956  * Spawn the kthread that invokes RCU callbacks.
957  */
rcu_spawn_kthreads(void)958 static int __init rcu_spawn_kthreads(void)
959 {
960 	struct sched_param sp;
961 
962 	rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
963 	sp.sched_priority = RCU_BOOST_PRIO;
964 	sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
965 	return 0;
966 }
967 early_initcall(rcu_spawn_kthreads);
968 
969 #else /* #ifdef CONFIG_RCU_BOOST */
970 
971 /* Hold off callback invocation until early_initcall() time. */
972 static int rcu_scheduler_fully_active __read_mostly;
973 
974 /*
975  * Start up softirq processing of callbacks.
976  */
invoke_rcu_callbacks(void)977 void invoke_rcu_callbacks(void)
978 {
979 	if (rcu_scheduler_fully_active)
980 		raise_softirq(RCU_SOFTIRQ);
981 }
982 
983 #ifdef CONFIG_RCU_TRACE
984 
985 /*
986  * There is no callback kthread, so this thread is never it.
987  */
rcu_is_callbacks_kthread(void)988 static bool rcu_is_callbacks_kthread(void)
989 {
990 	return false;
991 }
992 
993 #endif /* #ifdef CONFIG_RCU_TRACE */
994 
rcu_scheduler_really_started(void)995 static int __init rcu_scheduler_really_started(void)
996 {
997 	rcu_scheduler_fully_active = 1;
998 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
999 	raise_softirq(RCU_SOFTIRQ);  /* Invoke any callbacks from early boot. */
1000 	return 0;
1001 }
1002 early_initcall(rcu_scheduler_really_started);
1003 
1004 #endif /* #else #ifdef CONFIG_RCU_BOOST */
1005 
1006 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1007 #include <linux/kernel_stat.h>
1008 
1009 /*
1010  * During boot, we forgive RCU lockdep issues.  After this function is
1011  * invoked, we start taking RCU lockdep issues seriously.
1012  */
rcu_scheduler_starting(void)1013 void __init rcu_scheduler_starting(void)
1014 {
1015 	WARN_ON(nr_context_switches() > 0);
1016 	rcu_scheduler_active = 1;
1017 }
1018 
1019 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
1020 
1021 #ifdef CONFIG_RCU_TRACE
1022 
1023 #ifdef CONFIG_RCU_BOOST
1024 
rcu_initiate_boost_trace(void)1025 static void rcu_initiate_boost_trace(void)
1026 {
1027 	if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
1028 		rcu_preempt_ctrlblk.n_balk_blkd_tasks++;
1029 	else if (rcu_preempt_ctrlblk.gp_tasks == NULL &&
1030 		 rcu_preempt_ctrlblk.exp_tasks == NULL)
1031 		rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++;
1032 	else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
1033 		rcu_preempt_ctrlblk.n_balk_boost_tasks++;
1034 	else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
1035 		rcu_preempt_ctrlblk.n_balk_notyet++;
1036 	else
1037 		rcu_preempt_ctrlblk.n_balk_nos++;
1038 }
1039 
1040 #endif /* #ifdef CONFIG_RCU_BOOST */
1041 
rcu_trace_sub_qlen(struct rcu_ctrlblk * rcp,int n)1042 static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
1043 {
1044 	unsigned long flags;
1045 
1046 	local_irq_save(flags);
1047 	rcp->qlen -= n;
1048 	local_irq_restore(flags);
1049 }
1050 
1051 /*
1052  * Dump statistics for TINY_RCU, such as they are.
1053  */
show_tiny_stats(struct seq_file * m,void * unused)1054 static int show_tiny_stats(struct seq_file *m, void *unused)
1055 {
1056 	show_tiny_preempt_stats(m);
1057 	seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
1058 	seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
1059 	return 0;
1060 }
1061 
show_tiny_stats_open(struct inode * inode,struct file * file)1062 static int show_tiny_stats_open(struct inode *inode, struct file *file)
1063 {
1064 	return single_open(file, show_tiny_stats, NULL);
1065 }
1066 
1067 static const struct file_operations show_tiny_stats_fops = {
1068 	.owner = THIS_MODULE,
1069 	.open = show_tiny_stats_open,
1070 	.read = seq_read,
1071 	.llseek = seq_lseek,
1072 	.release = single_release,
1073 };
1074 
1075 static struct dentry *rcudir;
1076 
rcutiny_trace_init(void)1077 static int __init rcutiny_trace_init(void)
1078 {
1079 	struct dentry *retval;
1080 
1081 	rcudir = debugfs_create_dir("rcu", NULL);
1082 	if (!rcudir)
1083 		goto free_out;
1084 	retval = debugfs_create_file("rcudata", 0444, rcudir,
1085 				     NULL, &show_tiny_stats_fops);
1086 	if (!retval)
1087 		goto free_out;
1088 	return 0;
1089 free_out:
1090 	debugfs_remove_recursive(rcudir);
1091 	return 1;
1092 }
1093 
rcutiny_trace_cleanup(void)1094 static void __exit rcutiny_trace_cleanup(void)
1095 {
1096 	debugfs_remove_recursive(rcudir);
1097 }
1098 
1099 module_init(rcutiny_trace_init);
1100 module_exit(rcutiny_trace_cleanup);
1101 
1102 MODULE_AUTHOR("Paul E. McKenney");
1103 MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
1104 MODULE_LICENSE("GPL");
1105 
check_cpu_stall_preempt(void)1106 static void check_cpu_stall_preempt(void)
1107 {
1108 #ifdef CONFIG_TINY_PREEMPT_RCU
1109 	check_cpu_stall(&rcu_preempt_ctrlblk.rcb);
1110 #endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */
1111 }
1112 
1113 #endif /* #ifdef CONFIG_RCU_TRACE */
1114