1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * RCU segmented callback lists, function definitions
4 *
5 * Copyright IBM Corporation, 2017
6 *
7 * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
8 */
9
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/interrupt.h>
13 #include <linux/rcupdate.h>
14
15 #include "rcu_segcblist.h"
16
17 /* Initialize simple callback list. */
rcu_cblist_init(struct rcu_cblist * rclp)18 void rcu_cblist_init(struct rcu_cblist *rclp)
19 {
20 rclp->head = NULL;
21 rclp->tail = &rclp->head;
22 rclp->len = 0;
23 rclp->len_lazy = 0;
24 }
25
26 /*
27 * Enqueue an rcu_head structure onto the specified callback list.
28 * This function assumes that the callback is non-lazy because it
29 * is intended for use by no-CBs CPUs, which do not distinguish
30 * between lazy and non-lazy RCU callbacks.
31 */
rcu_cblist_enqueue(struct rcu_cblist * rclp,struct rcu_head * rhp)32 void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp)
33 {
34 *rclp->tail = rhp;
35 rclp->tail = &rhp->next;
36 WRITE_ONCE(rclp->len, rclp->len + 1);
37 }
38
39 /*
40 * Flush the second rcu_cblist structure onto the first one, obliterating
41 * any contents of the first. If rhp is non-NULL, enqueue it as the sole
42 * element of the second rcu_cblist structure, but ensuring that the second
43 * rcu_cblist structure, if initially non-empty, always appears non-empty
44 * throughout the process. If rdp is NULL, the second rcu_cblist structure
45 * is instead initialized to empty.
46 */
rcu_cblist_flush_enqueue(struct rcu_cblist * drclp,struct rcu_cblist * srclp,struct rcu_head * rhp)47 void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp,
48 struct rcu_cblist *srclp,
49 struct rcu_head *rhp)
50 {
51 drclp->head = srclp->head;
52 if (drclp->head)
53 drclp->tail = srclp->tail;
54 else
55 drclp->tail = &drclp->head;
56 drclp->len = srclp->len;
57 drclp->len_lazy = srclp->len_lazy;
58 if (!rhp) {
59 rcu_cblist_init(srclp);
60 } else {
61 rhp->next = NULL;
62 srclp->head = rhp;
63 srclp->tail = &rhp->next;
64 WRITE_ONCE(srclp->len, 1);
65 srclp->len_lazy = 0;
66 }
67 }
68
69 /*
70 * Dequeue the oldest rcu_head structure from the specified callback
71 * list. This function assumes that the callback is non-lazy, but
72 * the caller can later invoke rcu_cblist_dequeued_lazy() if it
73 * finds otherwise (and if it cares about laziness). This allows
74 * different users to have different ways of determining laziness.
75 */
rcu_cblist_dequeue(struct rcu_cblist * rclp)76 struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
77 {
78 struct rcu_head *rhp;
79
80 rhp = rclp->head;
81 if (!rhp)
82 return NULL;
83 rclp->len--;
84 rclp->head = rhp->next;
85 if (!rclp->head)
86 rclp->tail = &rclp->head;
87 return rhp;
88 }
89
90 /* Set the length of an rcu_segcblist structure. */
rcu_segcblist_set_len(struct rcu_segcblist * rsclp,long v)91 void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
92 {
93 #ifdef CONFIG_RCU_NOCB_CPU
94 atomic_long_set(&rsclp->len, v);
95 #else
96 WRITE_ONCE(rsclp->len, v);
97 #endif
98 }
99
100 /*
101 * Increase the numeric length of an rcu_segcblist structure by the
102 * specified amount, which can be negative. This can cause the ->len
103 * field to disagree with the actual number of callbacks on the structure.
104 * This increase is fully ordered with respect to the callers accesses
105 * both before and after.
106 */
rcu_segcblist_add_len(struct rcu_segcblist * rsclp,long v)107 void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v)
108 {
109 #ifdef CONFIG_RCU_NOCB_CPU
110 smp_mb__before_atomic(); /* Up to the caller! */
111 atomic_long_add(v, &rsclp->len);
112 smp_mb__after_atomic(); /* Up to the caller! */
113 #else
114 smp_mb(); /* Up to the caller! */
115 WRITE_ONCE(rsclp->len, rsclp->len + v);
116 smp_mb(); /* Up to the caller! */
117 #endif
118 }
119
120 /*
121 * Increase the numeric length of an rcu_segcblist structure by one.
122 * This can cause the ->len field to disagree with the actual number of
123 * callbacks on the structure. This increase is fully ordered with respect
124 * to the callers accesses both before and after.
125 */
rcu_segcblist_inc_len(struct rcu_segcblist * rsclp)126 void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp)
127 {
128 rcu_segcblist_add_len(rsclp, 1);
129 }
130
131 /*
132 * Exchange the numeric length of the specified rcu_segcblist structure
133 * with the specified value. This can cause the ->len field to disagree
134 * with the actual number of callbacks on the structure. This exchange is
135 * fully ordered with respect to the callers accesses both before and after.
136 */
rcu_segcblist_xchg_len(struct rcu_segcblist * rsclp,long v)137 long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v)
138 {
139 #ifdef CONFIG_RCU_NOCB_CPU
140 return atomic_long_xchg(&rsclp->len, v);
141 #else
142 long ret = rsclp->len;
143
144 smp_mb(); /* Up to the caller! */
145 WRITE_ONCE(rsclp->len, v);
146 smp_mb(); /* Up to the caller! */
147 return ret;
148 #endif
149 }
150
151 /*
152 * Initialize an rcu_segcblist structure.
153 */
rcu_segcblist_init(struct rcu_segcblist * rsclp)154 void rcu_segcblist_init(struct rcu_segcblist *rsclp)
155 {
156 int i;
157
158 BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
159 BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
160 rsclp->head = NULL;
161 for (i = 0; i < RCU_CBLIST_NSEGS; i++)
162 rsclp->tails[i] = &rsclp->head;
163 rcu_segcblist_set_len(rsclp, 0);
164 rsclp->len_lazy = 0;
165 rsclp->enabled = 1;
166 }
167
168 /*
169 * Disable the specified rcu_segcblist structure, so that callbacks can
170 * no longer be posted to it. This structure must be empty.
171 */
rcu_segcblist_disable(struct rcu_segcblist * rsclp)172 void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
173 {
174 WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
175 WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
176 WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
177 rsclp->enabled = 0;
178 }
179
180 /*
181 * Mark the specified rcu_segcblist structure as offloaded. This
182 * structure must be empty.
183 */
rcu_segcblist_offload(struct rcu_segcblist * rsclp)184 void rcu_segcblist_offload(struct rcu_segcblist *rsclp)
185 {
186 rsclp->offloaded = 1;
187 }
188
189 /*
190 * Does the specified rcu_segcblist structure contain callbacks that
191 * are ready to be invoked?
192 */
rcu_segcblist_ready_cbs(struct rcu_segcblist * rsclp)193 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
194 {
195 return rcu_segcblist_is_enabled(rsclp) &&
196 &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
197 }
198
199 /*
200 * Does the specified rcu_segcblist structure contain callbacks that
201 * are still pending, that is, not yet ready to be invoked?
202 */
rcu_segcblist_pend_cbs(struct rcu_segcblist * rsclp)203 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
204 {
205 return rcu_segcblist_is_enabled(rsclp) &&
206 !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
207 }
208
209 /*
210 * Return a pointer to the first callback in the specified rcu_segcblist
211 * structure. This is useful for diagnostics.
212 */
rcu_segcblist_first_cb(struct rcu_segcblist * rsclp)213 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
214 {
215 if (rcu_segcblist_is_enabled(rsclp))
216 return rsclp->head;
217 return NULL;
218 }
219
220 /*
221 * Return a pointer to the first pending callback in the specified
222 * rcu_segcblist structure. This is useful just after posting a given
223 * callback -- if that callback is the first pending callback, then
224 * you cannot rely on someone else having already started up the required
225 * grace period.
226 */
rcu_segcblist_first_pend_cb(struct rcu_segcblist * rsclp)227 struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
228 {
229 if (rcu_segcblist_is_enabled(rsclp))
230 return *rsclp->tails[RCU_DONE_TAIL];
231 return NULL;
232 }
233
234 /*
235 * Return false if there are no CBs awaiting grace periods, otherwise,
236 * return true and store the nearest waited-upon grace period into *lp.
237 */
rcu_segcblist_nextgp(struct rcu_segcblist * rsclp,unsigned long * lp)238 bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp)
239 {
240 if (!rcu_segcblist_pend_cbs(rsclp))
241 return false;
242 *lp = rsclp->gp_seq[RCU_WAIT_TAIL];
243 return true;
244 }
245
246 /*
247 * Enqueue the specified callback onto the specified rcu_segcblist
248 * structure, updating accounting as needed. Note that the ->len
249 * field may be accessed locklessly, hence the WRITE_ONCE().
250 * The ->len field is used by rcu_barrier() and friends to determine
251 * if it must post a callback on this structure, and it is OK
252 * for rcu_barrier() to sometimes post callbacks needlessly, but
253 * absolutely not OK for it to ever miss posting a callback.
254 */
rcu_segcblist_enqueue(struct rcu_segcblist * rsclp,struct rcu_head * rhp,bool lazy)255 void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
256 struct rcu_head *rhp, bool lazy)
257 {
258 rcu_segcblist_inc_len(rsclp);
259 if (lazy)
260 rsclp->len_lazy++;
261 smp_mb(); /* Ensure counts are updated before callback is enqueued. */
262 rhp->next = NULL;
263 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp);
264 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next);
265 }
266
267 /*
268 * Entrain the specified callback onto the specified rcu_segcblist at
269 * the end of the last non-empty segment. If the entire rcu_segcblist
270 * is empty, make no change, but return false.
271 *
272 * This is intended for use by rcu_barrier()-like primitives, -not-
273 * for normal grace-period use. IMPORTANT: The callback you enqueue
274 * will wait for all prior callbacks, NOT necessarily for a grace
275 * period. You have been warned.
276 */
rcu_segcblist_entrain(struct rcu_segcblist * rsclp,struct rcu_head * rhp,bool lazy)277 bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
278 struct rcu_head *rhp, bool lazy)
279 {
280 int i;
281
282 if (rcu_segcblist_n_cbs(rsclp) == 0)
283 return false;
284 rcu_segcblist_inc_len(rsclp);
285 if (lazy)
286 rsclp->len_lazy++;
287 smp_mb(); /* Ensure counts are updated before callback is entrained. */
288 rhp->next = NULL;
289 for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
290 if (rsclp->tails[i] != rsclp->tails[i - 1])
291 break;
292 WRITE_ONCE(*rsclp->tails[i], rhp);
293 for (; i <= RCU_NEXT_TAIL; i++)
294 WRITE_ONCE(rsclp->tails[i], &rhp->next);
295 return true;
296 }
297
298 /*
299 * Extract only the counts from the specified rcu_segcblist structure,
300 * and place them in the specified rcu_cblist structure. This function
301 * supports both callback orphaning and invocation, hence the separation
302 * of counts and callbacks. (Callbacks ready for invocation must be
303 * orphaned and adopted separately from pending callbacks, but counts
304 * apply to all callbacks. Locking must be used to make sure that
305 * both orphaned-callbacks lists are consistent.)
306 */
rcu_segcblist_extract_count(struct rcu_segcblist * rsclp,struct rcu_cblist * rclp)307 void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
308 struct rcu_cblist *rclp)
309 {
310 rclp->len_lazy += rsclp->len_lazy;
311 rsclp->len_lazy = 0;
312 rclp->len = rcu_segcblist_xchg_len(rsclp, 0);
313 }
314
315 /*
316 * Extract only those callbacks ready to be invoked from the specified
317 * rcu_segcblist structure and place them in the specified rcu_cblist
318 * structure.
319 */
rcu_segcblist_extract_done_cbs(struct rcu_segcblist * rsclp,struct rcu_cblist * rclp)320 void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
321 struct rcu_cblist *rclp)
322 {
323 int i;
324
325 if (!rcu_segcblist_ready_cbs(rsclp))
326 return; /* Nothing to do. */
327 *rclp->tail = rsclp->head;
328 WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]);
329 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL);
330 rclp->tail = rsclp->tails[RCU_DONE_TAIL];
331 for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
332 if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
333 WRITE_ONCE(rsclp->tails[i], &rsclp->head);
334 }
335
336 /*
337 * Extract only those callbacks still pending (not yet ready to be
338 * invoked) from the specified rcu_segcblist structure and place them in
339 * the specified rcu_cblist structure. Note that this loses information
340 * about any callbacks that might have been partway done waiting for
341 * their grace period. Too bad! They will have to start over.
342 */
rcu_segcblist_extract_pend_cbs(struct rcu_segcblist * rsclp,struct rcu_cblist * rclp)343 void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
344 struct rcu_cblist *rclp)
345 {
346 int i;
347
348 if (!rcu_segcblist_pend_cbs(rsclp))
349 return; /* Nothing to do. */
350 *rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
351 rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
352 WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL);
353 for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
354 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]);
355 }
356
357 /*
358 * Insert counts from the specified rcu_cblist structure in the
359 * specified rcu_segcblist structure.
360 */
rcu_segcblist_insert_count(struct rcu_segcblist * rsclp,struct rcu_cblist * rclp)361 void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
362 struct rcu_cblist *rclp)
363 {
364 rsclp->len_lazy += rclp->len_lazy;
365 rcu_segcblist_add_len(rsclp, rclp->len);
366 rclp->len_lazy = 0;
367 rclp->len = 0;
368 }
369
370 /*
371 * Move callbacks from the specified rcu_cblist to the beginning of the
372 * done-callbacks segment of the specified rcu_segcblist.
373 */
rcu_segcblist_insert_done_cbs(struct rcu_segcblist * rsclp,struct rcu_cblist * rclp)374 void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
375 struct rcu_cblist *rclp)
376 {
377 int i;
378
379 if (!rclp->head)
380 return; /* No callbacks to move. */
381 *rclp->tail = rsclp->head;
382 WRITE_ONCE(rsclp->head, rclp->head);
383 for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
384 if (&rsclp->head == rsclp->tails[i])
385 WRITE_ONCE(rsclp->tails[i], rclp->tail);
386 else
387 break;
388 rclp->head = NULL;
389 rclp->tail = &rclp->head;
390 }
391
392 /*
393 * Move callbacks from the specified rcu_cblist to the end of the
394 * new-callbacks segment of the specified rcu_segcblist.
395 */
rcu_segcblist_insert_pend_cbs(struct rcu_segcblist * rsclp,struct rcu_cblist * rclp)396 void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
397 struct rcu_cblist *rclp)
398 {
399 if (!rclp->head)
400 return; /* Nothing to do. */
401 WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head);
402 WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail);
403 rclp->head = NULL;
404 rclp->tail = &rclp->head;
405 }
406
407 /*
408 * Advance the callbacks in the specified rcu_segcblist structure based
409 * on the current value passed in for the grace-period counter.
410 */
rcu_segcblist_advance(struct rcu_segcblist * rsclp,unsigned long seq)411 void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
412 {
413 int i, j;
414
415 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
416 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
417 return;
418
419 /*
420 * Find all callbacks whose ->gp_seq numbers indicate that they
421 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
422 */
423 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
424 if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
425 break;
426 WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]);
427 }
428
429 /* If no callbacks moved, nothing more need be done. */
430 if (i == RCU_WAIT_TAIL)
431 return;
432
433 /* Clean up tail pointers that might have been misordered above. */
434 for (j = RCU_WAIT_TAIL; j < i; j++)
435 WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]);
436
437 /*
438 * Callbacks moved, so clean up the misordered ->tails[] pointers
439 * that now point into the middle of the list of ready-to-invoke
440 * callbacks. The overall effect is to copy down the later pointers
441 * into the gap that was created by the now-ready segments.
442 */
443 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
444 if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
445 break; /* No more callbacks. */
446 WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]);
447 rsclp->gp_seq[j] = rsclp->gp_seq[i];
448 }
449 }
450
451 /*
452 * "Accelerate" callbacks based on more-accurate grace-period information.
453 * The reason for this is that RCU does not synchronize the beginnings and
454 * ends of grace periods, and that callbacks are posted locally. This in
455 * turn means that the callbacks must be labelled conservatively early
456 * on, as getting exact information would degrade both performance and
457 * scalability. When more accurate grace-period information becomes
458 * available, previously posted callbacks can be "accelerated", marking
459 * them to complete at the end of the earlier grace period.
460 *
461 * This function operates on an rcu_segcblist structure, and also the
462 * grace-period sequence number seq at which new callbacks would become
463 * ready to invoke. Returns true if there are callbacks that won't be
464 * ready to invoke until seq, false otherwise.
465 */
rcu_segcblist_accelerate(struct rcu_segcblist * rsclp,unsigned long seq)466 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
467 {
468 int i;
469
470 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
471 if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
472 return false;
473
474 /*
475 * Find the segment preceding the oldest segment of callbacks
476 * whose ->gp_seq[] completion is at or after that passed in via
477 * "seq", skipping any empty segments. This oldest segment, along
478 * with any later segments, can be merged in with any newly arrived
479 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
480 * as their ->gp_seq[] grace-period completion sequence number.
481 */
482 for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
483 if (rsclp->tails[i] != rsclp->tails[i - 1] &&
484 ULONG_CMP_LT(rsclp->gp_seq[i], seq))
485 break;
486
487 /*
488 * If all the segments contain callbacks that correspond to
489 * earlier grace-period sequence numbers than "seq", leave.
490 * Assuming that the rcu_segcblist structure has enough
491 * segments in its arrays, this can only happen if some of
492 * the non-done segments contain callbacks that really are
493 * ready to invoke. This situation will get straightened
494 * out by the next call to rcu_segcblist_advance().
495 *
496 * Also advance to the oldest segment of callbacks whose
497 * ->gp_seq[] completion is at or after that passed in via "seq",
498 * skipping any empty segments.
499 */
500 if (++i >= RCU_NEXT_TAIL)
501 return false;
502
503 /*
504 * Merge all later callbacks, including newly arrived callbacks,
505 * into the segment located by the for-loop above. Assign "seq"
506 * as the ->gp_seq[] value in order to correctly handle the case
507 * where there were no pending callbacks in the rcu_segcblist
508 * structure other than in the RCU_NEXT_TAIL segment.
509 */
510 for (; i < RCU_NEXT_TAIL; i++) {
511 WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_NEXT_TAIL]);
512 rsclp->gp_seq[i] = seq;
513 }
514 return true;
515 }
516
517 /*
518 * Merge the source rcu_segcblist structure into the destination
519 * rcu_segcblist structure, then initialize the source. Any pending
520 * callbacks from the source get to start over. It is best to
521 * advance and accelerate both the destination and the source
522 * before merging.
523 */
rcu_segcblist_merge(struct rcu_segcblist * dst_rsclp,struct rcu_segcblist * src_rsclp)524 void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
525 struct rcu_segcblist *src_rsclp)
526 {
527 struct rcu_cblist donecbs;
528 struct rcu_cblist pendcbs;
529
530 rcu_cblist_init(&donecbs);
531 rcu_cblist_init(&pendcbs);
532 rcu_segcblist_extract_count(src_rsclp, &donecbs);
533 rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs);
534 rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs);
535 rcu_segcblist_insert_count(dst_rsclp, &donecbs);
536 rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs);
537 rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs);
538 rcu_segcblist_init(src_rsclp);
539 }
540