1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * KCSAN core runtime.
4 *
5 * Copyright (C) 2019, Google LLC.
6 */
7
8 #define pr_fmt(fmt) "kcsan: " fmt
9
10 #include <linux/atomic.h>
11 #include <linux/bug.h>
12 #include <linux/delay.h>
13 #include <linux/export.h>
14 #include <linux/init.h>
15 #include <linux/kernel.h>
16 #include <linux/list.h>
17 #include <linux/minmax.h>
18 #include <linux/moduleparam.h>
19 #include <linux/percpu.h>
20 #include <linux/preempt.h>
21 #include <linux/sched.h>
22 #include <linux/string.h>
23 #include <linux/uaccess.h>
24
25 #include "encoding.h"
26 #include "kcsan.h"
27 #include "permissive.h"
28
29 static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
30 unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
31 unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT;
32 static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH;
33 static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER);
34
35 #ifdef MODULE_PARAM_PREFIX
36 #undef MODULE_PARAM_PREFIX
37 #endif
38 #define MODULE_PARAM_PREFIX "kcsan."
39 module_param_named(early_enable, kcsan_early_enable, bool, 0);
40 module_param_named(udelay_task, kcsan_udelay_task, uint, 0644);
41 module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644);
42 module_param_named(skip_watch, kcsan_skip_watch, long, 0644);
43 module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444);
44
45 bool kcsan_enabled;
46
47 /* Per-CPU kcsan_ctx for interrupts */
48 static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = {
49 .disable_count = 0,
50 .atomic_next = 0,
51 .atomic_nest_count = 0,
52 .in_flat_atomic = false,
53 .access_mask = 0,
54 .scoped_accesses = {LIST_POISON1, NULL},
55 };
56
57 /*
58 * Helper macros to index into adjacent slots, starting from address slot
59 * itself, followed by the right and left slots.
60 *
61 * The purpose is 2-fold:
62 *
63 * 1. if during insertion the address slot is already occupied, check if
64 * any adjacent slots are free;
65 * 2. accesses that straddle a slot boundary due to size that exceeds a
66 * slot's range may check adjacent slots if any watchpoint matches.
67 *
68 * Note that accesses with very large size may still miss a watchpoint; however,
69 * given this should be rare, this is a reasonable trade-off to make, since this
70 * will avoid:
71 *
72 * 1. excessive contention between watchpoint checks and setup;
73 * 2. larger number of simultaneous watchpoints without sacrificing
74 * performance.
75 *
76 * Example: SLOT_IDX values for KCSAN_CHECK_ADJACENT=1, where i is [0, 1, 2]:
77 *
78 * slot=0: [ 1, 2, 0]
79 * slot=9: [10, 11, 9]
80 * slot=63: [64, 65, 63]
81 */
82 #define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS))
83
84 /*
85 * SLOT_IDX_FAST is used in the fast-path. Not first checking the address's primary
86 * slot (middle) is fine if we assume that races occur rarely. The set of
87 * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to
88 * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}.
89 */
90 #define SLOT_IDX_FAST(slot, i) (slot + i)
91
92 /*
93 * Watchpoints, with each entry encoded as defined in encoding.h: in order to be
94 * able to safely update and access a watchpoint without introducing locking
95 * overhead, we encode each watchpoint as a single atomic long. The initial
96 * zero-initialized state matches INVALID_WATCHPOINT.
97 *
98 * Add NUM_SLOTS-1 entries to account for overflow; this helps avoid having to
99 * use more complicated SLOT_IDX_FAST calculation with modulo in the fast-path.
100 */
101 static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
102
103 /*
104 * Instructions to skip watching counter, used in should_watch(). We use a
105 * per-CPU counter to avoid excessive contention.
106 */
107 static DEFINE_PER_CPU(long, kcsan_skip);
108
109 /* For kcsan_prandom_u32_max(). */
110 static DEFINE_PER_CPU(u32, kcsan_rand_state);
111
find_watchpoint(unsigned long addr,size_t size,bool expect_write,long * encoded_watchpoint)112 static __always_inline atomic_long_t *find_watchpoint(unsigned long addr,
113 size_t size,
114 bool expect_write,
115 long *encoded_watchpoint)
116 {
117 const int slot = watchpoint_slot(addr);
118 const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
119 atomic_long_t *watchpoint;
120 unsigned long wp_addr_masked;
121 size_t wp_size;
122 bool is_write;
123 int i;
124
125 BUILD_BUG_ON(CONFIG_KCSAN_NUM_WATCHPOINTS < NUM_SLOTS);
126
127 for (i = 0; i < NUM_SLOTS; ++i) {
128 watchpoint = &watchpoints[SLOT_IDX_FAST(slot, i)];
129 *encoded_watchpoint = atomic_long_read(watchpoint);
130 if (!decode_watchpoint(*encoded_watchpoint, &wp_addr_masked,
131 &wp_size, &is_write))
132 continue;
133
134 if (expect_write && !is_write)
135 continue;
136
137 /* Check if the watchpoint matches the access. */
138 if (matching_access(wp_addr_masked, wp_size, addr_masked, size))
139 return watchpoint;
140 }
141
142 return NULL;
143 }
144
145 static inline atomic_long_t *
insert_watchpoint(unsigned long addr,size_t size,bool is_write)146 insert_watchpoint(unsigned long addr, size_t size, bool is_write)
147 {
148 const int slot = watchpoint_slot(addr);
149 const long encoded_watchpoint = encode_watchpoint(addr, size, is_write);
150 atomic_long_t *watchpoint;
151 int i;
152
153 /* Check slot index logic, ensuring we stay within array bounds. */
154 BUILD_BUG_ON(SLOT_IDX(0, 0) != KCSAN_CHECK_ADJACENT);
155 BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT+1) != 0);
156 BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT) != ARRAY_SIZE(watchpoints)-1);
157 BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT+1) != ARRAY_SIZE(watchpoints) - NUM_SLOTS);
158
159 for (i = 0; i < NUM_SLOTS; ++i) {
160 long expect_val = INVALID_WATCHPOINT;
161
162 /* Try to acquire this slot. */
163 watchpoint = &watchpoints[SLOT_IDX(slot, i)];
164 if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, encoded_watchpoint))
165 return watchpoint;
166 }
167
168 return NULL;
169 }
170
171 /*
172 * Return true if watchpoint was successfully consumed, false otherwise.
173 *
174 * This may return false if:
175 *
176 * 1. another thread already consumed the watchpoint;
177 * 2. the thread that set up the watchpoint already removed it;
178 * 3. the watchpoint was removed and then re-used.
179 */
180 static __always_inline bool
try_consume_watchpoint(atomic_long_t * watchpoint,long encoded_watchpoint)181 try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
182 {
183 return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
184 }
185
186 /* Return true if watchpoint was not touched, false if already consumed. */
consume_watchpoint(atomic_long_t * watchpoint)187 static inline bool consume_watchpoint(atomic_long_t *watchpoint)
188 {
189 return atomic_long_xchg_relaxed(watchpoint, CONSUMED_WATCHPOINT) != CONSUMED_WATCHPOINT;
190 }
191
192 /* Remove the watchpoint -- its slot may be reused after. */
remove_watchpoint(atomic_long_t * watchpoint)193 static inline void remove_watchpoint(atomic_long_t *watchpoint)
194 {
195 atomic_long_set(watchpoint, INVALID_WATCHPOINT);
196 }
197
get_ctx(void)198 static __always_inline struct kcsan_ctx *get_ctx(void)
199 {
200 /*
201 * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
202 * also result in calls that generate warnings in uaccess regions.
203 */
204 return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
205 }
206
207 /* Check scoped accesses; never inline because this is a slow-path! */
kcsan_check_scoped_accesses(void)208 static noinline void kcsan_check_scoped_accesses(void)
209 {
210 struct kcsan_ctx *ctx = get_ctx();
211 struct list_head *prev_save = ctx->scoped_accesses.prev;
212 struct kcsan_scoped_access *scoped_access;
213
214 ctx->scoped_accesses.prev = NULL; /* Avoid recursion. */
215 list_for_each_entry(scoped_access, &ctx->scoped_accesses, list)
216 __kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type);
217 ctx->scoped_accesses.prev = prev_save;
218 }
219
220 /* Rules for generic atomic accesses. Called from fast-path. */
221 static __always_inline bool
is_atomic(const volatile void * ptr,size_t size,int type,struct kcsan_ctx * ctx)222 is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx)
223 {
224 if (type & KCSAN_ACCESS_ATOMIC)
225 return true;
226
227 /*
228 * Unless explicitly declared atomic, never consider an assertion access
229 * as atomic. This allows using them also in atomic regions, such as
230 * seqlocks, without implicitly changing their semantics.
231 */
232 if (type & KCSAN_ACCESS_ASSERT)
233 return false;
234
235 if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) &&
236 (type & KCSAN_ACCESS_WRITE) && size <= sizeof(long) &&
237 !(type & KCSAN_ACCESS_COMPOUND) && IS_ALIGNED((unsigned long)ptr, size))
238 return true; /* Assume aligned writes up to word size are atomic. */
239
240 if (ctx->atomic_next > 0) {
241 /*
242 * Because we do not have separate contexts for nested
243 * interrupts, in case atomic_next is set, we simply assume that
244 * the outer interrupt set atomic_next. In the worst case, we
245 * will conservatively consider operations as atomic. This is a
246 * reasonable trade-off to make, since this case should be
247 * extremely rare; however, even if extremely rare, it could
248 * lead to false positives otherwise.
249 */
250 if ((hardirq_count() >> HARDIRQ_SHIFT) < 2)
251 --ctx->atomic_next; /* in task, or outer interrupt */
252 return true;
253 }
254
255 return ctx->atomic_nest_count > 0 || ctx->in_flat_atomic;
256 }
257
258 static __always_inline bool
should_watch(const volatile void * ptr,size_t size,int type,struct kcsan_ctx * ctx)259 should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx)
260 {
261 /*
262 * Never set up watchpoints when memory operations are atomic.
263 *
264 * Need to check this first, before kcsan_skip check below: (1) atomics
265 * should not count towards skipped instructions, and (2) to actually
266 * decrement kcsan_atomic_next for consecutive instruction stream.
267 */
268 if (is_atomic(ptr, size, type, ctx))
269 return false;
270
271 if (this_cpu_dec_return(kcsan_skip) >= 0)
272 return false;
273
274 /*
275 * NOTE: If we get here, kcsan_skip must always be reset in slow path
276 * via reset_kcsan_skip() to avoid underflow.
277 */
278
279 /* this operation should be watched */
280 return true;
281 }
282
283 /*
284 * Returns a pseudo-random number in interval [0, ep_ro). Simple linear
285 * congruential generator, using constants from "Numerical Recipes".
286 */
kcsan_prandom_u32_max(u32 ep_ro)287 static u32 kcsan_prandom_u32_max(u32 ep_ro)
288 {
289 u32 state = this_cpu_read(kcsan_rand_state);
290
291 state = 1664525 * state + 1013904223;
292 this_cpu_write(kcsan_rand_state, state);
293
294 return state % ep_ro;
295 }
296
reset_kcsan_skip(void)297 static inline void reset_kcsan_skip(void)
298 {
299 long skip_count = kcsan_skip_watch -
300 (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ?
301 kcsan_prandom_u32_max(kcsan_skip_watch) :
302 0);
303 this_cpu_write(kcsan_skip, skip_count);
304 }
305
kcsan_is_enabled(struct kcsan_ctx * ctx)306 static __always_inline bool kcsan_is_enabled(struct kcsan_ctx *ctx)
307 {
308 return READ_ONCE(kcsan_enabled) && !ctx->disable_count;
309 }
310
311 /* Introduce delay depending on context and configuration. */
delay_access(int type)312 static void delay_access(int type)
313 {
314 unsigned int delay = in_task() ? kcsan_udelay_task : kcsan_udelay_interrupt;
315 /* For certain access types, skew the random delay to be longer. */
316 unsigned int skew_delay_order =
317 (type & (KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_ASSERT)) ? 1 : 0;
318
319 delay -= IS_ENABLED(CONFIG_KCSAN_DELAY_RANDOMIZE) ?
320 kcsan_prandom_u32_max(delay >> skew_delay_order) :
321 0;
322 udelay(delay);
323 }
324
kcsan_save_irqtrace(struct task_struct * task)325 void kcsan_save_irqtrace(struct task_struct *task)
326 {
327 #ifdef CONFIG_TRACE_IRQFLAGS
328 task->kcsan_save_irqtrace = task->irqtrace;
329 #endif
330 }
331
kcsan_restore_irqtrace(struct task_struct * task)332 void kcsan_restore_irqtrace(struct task_struct *task)
333 {
334 #ifdef CONFIG_TRACE_IRQFLAGS
335 task->irqtrace = task->kcsan_save_irqtrace;
336 #endif
337 }
338
339 /*
340 * Pull everything together: check_access() below contains the performance
341 * critical operations; the fast-path (including check_access) functions should
342 * all be inlinable by the instrumentation functions.
343 *
344 * The slow-path (kcsan_found_watchpoint, kcsan_setup_watchpoint) are
345 * non-inlinable -- note that, we prefix these with "kcsan_" to ensure they can
346 * be filtered from the stacktrace, as well as give them unique names for the
347 * UACCESS whitelist of objtool. Each function uses user_access_save/restore(),
348 * since they do not access any user memory, but instrumentation is still
349 * emitted in UACCESS regions.
350 */
351
kcsan_found_watchpoint(const volatile void * ptr,size_t size,int type,atomic_long_t * watchpoint,long encoded_watchpoint)352 static noinline void kcsan_found_watchpoint(const volatile void *ptr,
353 size_t size,
354 int type,
355 atomic_long_t *watchpoint,
356 long encoded_watchpoint)
357 {
358 const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
359 struct kcsan_ctx *ctx = get_ctx();
360 unsigned long flags;
361 bool consumed;
362
363 /*
364 * We know a watchpoint exists. Let's try to keep the race-window
365 * between here and finally consuming the watchpoint below as small as
366 * possible -- avoid unneccessarily complex code until consumed.
367 */
368
369 if (!kcsan_is_enabled(ctx))
370 return;
371
372 /*
373 * The access_mask check relies on value-change comparison. To avoid
374 * reporting a race where e.g. the writer set up the watchpoint, but the
375 * reader has access_mask!=0, we have to ignore the found watchpoint.
376 */
377 if (ctx->access_mask)
378 return;
379
380 /*
381 * If the other thread does not want to ignore the access, and there was
382 * a value change as a result of this thread's operation, we will still
383 * generate a report of unknown origin.
384 *
385 * Use CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN=n to filter.
386 */
387 if (!is_assert && kcsan_ignore_address(ptr))
388 return;
389
390 /*
391 * Consuming the watchpoint must be guarded by kcsan_is_enabled() to
392 * avoid erroneously triggering reports if the context is disabled.
393 */
394 consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint);
395
396 /* keep this after try_consume_watchpoint */
397 flags = user_access_save();
398
399 if (consumed) {
400 kcsan_save_irqtrace(current);
401 kcsan_report_set_info(ptr, size, type, watchpoint - watchpoints);
402 kcsan_restore_irqtrace(current);
403 } else {
404 /*
405 * The other thread may not print any diagnostics, as it has
406 * already removed the watchpoint, or another thread consumed
407 * the watchpoint before this thread.
408 */
409 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_REPORT_RACES]);
410 }
411
412 if (is_assert)
413 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
414 else
415 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_DATA_RACES]);
416
417 user_access_restore(flags);
418 }
419
420 static noinline void
kcsan_setup_watchpoint(const volatile void * ptr,size_t size,int type)421 kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
422 {
423 const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0;
424 const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
425 atomic_long_t *watchpoint;
426 u64 old, new, diff;
427 unsigned long access_mask;
428 enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
429 unsigned long ua_flags = user_access_save();
430 struct kcsan_ctx *ctx = get_ctx();
431 unsigned long irq_flags = 0;
432
433 /*
434 * Always reset kcsan_skip counter in slow-path to avoid underflow; see
435 * should_watch().
436 */
437 reset_kcsan_skip();
438
439 if (!kcsan_is_enabled(ctx))
440 goto out;
441
442 /*
443 * Check to-ignore addresses after kcsan_is_enabled(), as we may access
444 * memory that is not yet initialized during early boot.
445 */
446 if (!is_assert && kcsan_ignore_address(ptr))
447 goto out;
448
449 if (!check_encodable((unsigned long)ptr, size)) {
450 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_UNENCODABLE_ACCESSES]);
451 goto out;
452 }
453
454 /*
455 * Save and restore the IRQ state trace touched by KCSAN, since KCSAN's
456 * runtime is entered for every memory access, and potentially useful
457 * information is lost if dirtied by KCSAN.
458 */
459 kcsan_save_irqtrace(current);
460 if (!kcsan_interrupt_watcher)
461 local_irq_save(irq_flags);
462
463 watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
464 if (watchpoint == NULL) {
465 /*
466 * Out of capacity: the size of 'watchpoints', and the frequency
467 * with which should_watch() returns true should be tweaked so
468 * that this case happens very rarely.
469 */
470 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_NO_CAPACITY]);
471 goto out_unlock;
472 }
473
474 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_SETUP_WATCHPOINTS]);
475 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_USED_WATCHPOINTS]);
476
477 /*
478 * Read the current value, to later check and infer a race if the data
479 * was modified via a non-instrumented access, e.g. from a device.
480 */
481 old = 0;
482 switch (size) {
483 case 1:
484 old = READ_ONCE(*(const u8 *)ptr);
485 break;
486 case 2:
487 old = READ_ONCE(*(const u16 *)ptr);
488 break;
489 case 4:
490 old = READ_ONCE(*(const u32 *)ptr);
491 break;
492 case 8:
493 old = READ_ONCE(*(const u64 *)ptr);
494 break;
495 default:
496 break; /* ignore; we do not diff the values */
497 }
498
499 /*
500 * Delay this thread, to increase probability of observing a racy
501 * conflicting access.
502 */
503 delay_access(type);
504
505 /*
506 * Re-read value, and check if it is as expected; if not, we infer a
507 * racy access.
508 */
509 access_mask = ctx->access_mask;
510 new = 0;
511 switch (size) {
512 case 1:
513 new = READ_ONCE(*(const u8 *)ptr);
514 break;
515 case 2:
516 new = READ_ONCE(*(const u16 *)ptr);
517 break;
518 case 4:
519 new = READ_ONCE(*(const u32 *)ptr);
520 break;
521 case 8:
522 new = READ_ONCE(*(const u64 *)ptr);
523 break;
524 default:
525 break; /* ignore; we do not diff the values */
526 }
527
528 diff = old ^ new;
529 if (access_mask)
530 diff &= access_mask;
531
532 /*
533 * Check if we observed a value change.
534 *
535 * Also check if the data race should be ignored (the rules depend on
536 * non-zero diff); if it is to be ignored, the below rules for
537 * KCSAN_VALUE_CHANGE_MAYBE apply.
538 */
539 if (diff && !kcsan_ignore_data_race(size, type, old, new, diff))
540 value_change = KCSAN_VALUE_CHANGE_TRUE;
541
542 /* Check if this access raced with another. */
543 if (!consume_watchpoint(watchpoint)) {
544 /*
545 * Depending on the access type, map a value_change of MAYBE to
546 * TRUE (always report) or FALSE (never report).
547 */
548 if (value_change == KCSAN_VALUE_CHANGE_MAYBE) {
549 if (access_mask != 0) {
550 /*
551 * For access with access_mask, we require a
552 * value-change, as it is likely that races on
553 * ~access_mask bits are expected.
554 */
555 value_change = KCSAN_VALUE_CHANGE_FALSE;
556 } else if (size > 8 || is_assert) {
557 /* Always assume a value-change. */
558 value_change = KCSAN_VALUE_CHANGE_TRUE;
559 }
560 }
561
562 /*
563 * No need to increment 'data_races' counter, as the racing
564 * thread already did.
565 *
566 * Count 'assert_failures' for each failed ASSERT access,
567 * therefore both this thread and the racing thread may
568 * increment this counter.
569 */
570 if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE)
571 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
572
573 kcsan_report_known_origin(ptr, size, type, value_change,
574 watchpoint - watchpoints,
575 old, new, access_mask);
576 } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) {
577 /* Inferring a race, since the value should not have changed. */
578
579 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN]);
580 if (is_assert)
581 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
582
583 if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert)
584 kcsan_report_unknown_origin(ptr, size, type, old, new, access_mask);
585 }
586
587 /*
588 * Remove watchpoint; must be after reporting, since the slot may be
589 * reused after this point.
590 */
591 remove_watchpoint(watchpoint);
592 atomic_long_dec(&kcsan_counters[KCSAN_COUNTER_USED_WATCHPOINTS]);
593 out_unlock:
594 if (!kcsan_interrupt_watcher)
595 local_irq_restore(irq_flags);
596 kcsan_restore_irqtrace(current);
597 out:
598 user_access_restore(ua_flags);
599 }
600
check_access(const volatile void * ptr,size_t size,int type)601 static __always_inline void check_access(const volatile void *ptr, size_t size,
602 int type)
603 {
604 const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0;
605 atomic_long_t *watchpoint;
606 long encoded_watchpoint;
607
608 /*
609 * Do nothing for 0 sized check; this comparison will be optimized out
610 * for constant sized instrumentation (__tsan_{read,write}N).
611 */
612 if (unlikely(size == 0))
613 return;
614
615 /*
616 * Avoid user_access_save in fast-path: find_watchpoint is safe without
617 * user_access_save, as the address that ptr points to is only used to
618 * check if a watchpoint exists; ptr is never dereferenced.
619 */
620 watchpoint = find_watchpoint((unsigned long)ptr, size, !is_write,
621 &encoded_watchpoint);
622 /*
623 * It is safe to check kcsan_is_enabled() after find_watchpoint in the
624 * slow-path, as long as no state changes that cause a race to be
625 * detected and reported have occurred until kcsan_is_enabled() is
626 * checked.
627 */
628
629 if (unlikely(watchpoint != NULL))
630 kcsan_found_watchpoint(ptr, size, type, watchpoint,
631 encoded_watchpoint);
632 else {
633 struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */
634
635 if (unlikely(should_watch(ptr, size, type, ctx)))
636 kcsan_setup_watchpoint(ptr, size, type);
637 else if (unlikely(ctx->scoped_accesses.prev))
638 kcsan_check_scoped_accesses();
639 }
640 }
641
642 /* === Public interface ===================================================== */
643
kcsan_init(void)644 void __init kcsan_init(void)
645 {
646 int cpu;
647
648 BUG_ON(!in_task());
649
650 for_each_possible_cpu(cpu)
651 per_cpu(kcsan_rand_state, cpu) = (u32)get_cycles();
652
653 /*
654 * We are in the init task, and no other tasks should be running;
655 * WRITE_ONCE without memory barrier is sufficient.
656 */
657 if (kcsan_early_enable) {
658 pr_info("enabled early\n");
659 WRITE_ONCE(kcsan_enabled, true);
660 }
661
662 if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) ||
663 IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) ||
664 IS_ENABLED(CONFIG_KCSAN_PERMISSIVE) ||
665 IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {
666 pr_warn("non-strict mode configured - use CONFIG_KCSAN_STRICT=y to see all data races\n");
667 } else {
668 pr_info("strict mode configured\n");
669 }
670 }
671
672 /* === Exported interface =================================================== */
673
kcsan_disable_current(void)674 void kcsan_disable_current(void)
675 {
676 ++get_ctx()->disable_count;
677 }
678 EXPORT_SYMBOL(kcsan_disable_current);
679
kcsan_enable_current(void)680 void kcsan_enable_current(void)
681 {
682 if (get_ctx()->disable_count-- == 0) {
683 /*
684 * Warn if kcsan_enable_current() calls are unbalanced with
685 * kcsan_disable_current() calls, which causes disable_count to
686 * become negative and should not happen.
687 */
688 kcsan_disable_current(); /* restore to 0, KCSAN still enabled */
689 kcsan_disable_current(); /* disable to generate warning */
690 WARN(1, "Unbalanced %s()", __func__);
691 kcsan_enable_current();
692 }
693 }
694 EXPORT_SYMBOL(kcsan_enable_current);
695
kcsan_enable_current_nowarn(void)696 void kcsan_enable_current_nowarn(void)
697 {
698 if (get_ctx()->disable_count-- == 0)
699 kcsan_disable_current();
700 }
701 EXPORT_SYMBOL(kcsan_enable_current_nowarn);
702
kcsan_nestable_atomic_begin(void)703 void kcsan_nestable_atomic_begin(void)
704 {
705 /*
706 * Do *not* check and warn if we are in a flat atomic region: nestable
707 * and flat atomic regions are independent from each other.
708 * See include/linux/kcsan.h: struct kcsan_ctx comments for more
709 * comments.
710 */
711
712 ++get_ctx()->atomic_nest_count;
713 }
714 EXPORT_SYMBOL(kcsan_nestable_atomic_begin);
715
kcsan_nestable_atomic_end(void)716 void kcsan_nestable_atomic_end(void)
717 {
718 if (get_ctx()->atomic_nest_count-- == 0) {
719 /*
720 * Warn if kcsan_nestable_atomic_end() calls are unbalanced with
721 * kcsan_nestable_atomic_begin() calls, which causes
722 * atomic_nest_count to become negative and should not happen.
723 */
724 kcsan_nestable_atomic_begin(); /* restore to 0 */
725 kcsan_disable_current(); /* disable to generate warning */
726 WARN(1, "Unbalanced %s()", __func__);
727 kcsan_enable_current();
728 }
729 }
730 EXPORT_SYMBOL(kcsan_nestable_atomic_end);
731
kcsan_flat_atomic_begin(void)732 void kcsan_flat_atomic_begin(void)
733 {
734 get_ctx()->in_flat_atomic = true;
735 }
736 EXPORT_SYMBOL(kcsan_flat_atomic_begin);
737
kcsan_flat_atomic_end(void)738 void kcsan_flat_atomic_end(void)
739 {
740 get_ctx()->in_flat_atomic = false;
741 }
742 EXPORT_SYMBOL(kcsan_flat_atomic_end);
743
kcsan_atomic_next(int n)744 void kcsan_atomic_next(int n)
745 {
746 get_ctx()->atomic_next = n;
747 }
748 EXPORT_SYMBOL(kcsan_atomic_next);
749
kcsan_set_access_mask(unsigned long mask)750 void kcsan_set_access_mask(unsigned long mask)
751 {
752 get_ctx()->access_mask = mask;
753 }
754 EXPORT_SYMBOL(kcsan_set_access_mask);
755
756 struct kcsan_scoped_access *
kcsan_begin_scoped_access(const volatile void * ptr,size_t size,int type,struct kcsan_scoped_access * sa)757 kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
758 struct kcsan_scoped_access *sa)
759 {
760 struct kcsan_ctx *ctx = get_ctx();
761
762 __kcsan_check_access(ptr, size, type);
763
764 ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
765
766 INIT_LIST_HEAD(&sa->list);
767 sa->ptr = ptr;
768 sa->size = size;
769 sa->type = type;
770
771 if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */
772 INIT_LIST_HEAD(&ctx->scoped_accesses);
773 list_add(&sa->list, &ctx->scoped_accesses);
774
775 ctx->disable_count--;
776 return sa;
777 }
778 EXPORT_SYMBOL(kcsan_begin_scoped_access);
779
kcsan_end_scoped_access(struct kcsan_scoped_access * sa)780 void kcsan_end_scoped_access(struct kcsan_scoped_access *sa)
781 {
782 struct kcsan_ctx *ctx = get_ctx();
783
784 if (WARN(!ctx->scoped_accesses.prev, "Unbalanced %s()?", __func__))
785 return;
786
787 ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
788
789 list_del(&sa->list);
790 if (list_empty(&ctx->scoped_accesses))
791 /*
792 * Ensure we do not enter kcsan_check_scoped_accesses()
793 * slow-path if unnecessary, and avoids requiring list_empty()
794 * in the fast-path (to avoid a READ_ONCE() and potential
795 * uaccess warning).
796 */
797 ctx->scoped_accesses.prev = NULL;
798
799 ctx->disable_count--;
800
801 __kcsan_check_access(sa->ptr, sa->size, sa->type);
802 }
803 EXPORT_SYMBOL(kcsan_end_scoped_access);
804
__kcsan_check_access(const volatile void * ptr,size_t size,int type)805 void __kcsan_check_access(const volatile void *ptr, size_t size, int type)
806 {
807 check_access(ptr, size, type);
808 }
809 EXPORT_SYMBOL(__kcsan_check_access);
810
811 /*
812 * KCSAN uses the same instrumentation that is emitted by supported compilers
813 * for ThreadSanitizer (TSAN).
814 *
815 * When enabled, the compiler emits instrumentation calls (the functions
816 * prefixed with "__tsan" below) for all loads and stores that it generated;
817 * inline asm is not instrumented.
818 *
819 * Note that, not all supported compiler versions distinguish aligned/unaligned
820 * accesses, but e.g. recent versions of Clang do. We simply alias the unaligned
821 * version to the generic version, which can handle both.
822 */
823
824 #define DEFINE_TSAN_READ_WRITE(size) \
825 void __tsan_read##size(void *ptr); \
826 void __tsan_read##size(void *ptr) \
827 { \
828 check_access(ptr, size, 0); \
829 } \
830 EXPORT_SYMBOL(__tsan_read##size); \
831 void __tsan_unaligned_read##size(void *ptr) \
832 __alias(__tsan_read##size); \
833 EXPORT_SYMBOL(__tsan_unaligned_read##size); \
834 void __tsan_write##size(void *ptr); \
835 void __tsan_write##size(void *ptr) \
836 { \
837 check_access(ptr, size, KCSAN_ACCESS_WRITE); \
838 } \
839 EXPORT_SYMBOL(__tsan_write##size); \
840 void __tsan_unaligned_write##size(void *ptr) \
841 __alias(__tsan_write##size); \
842 EXPORT_SYMBOL(__tsan_unaligned_write##size); \
843 void __tsan_read_write##size(void *ptr); \
844 void __tsan_read_write##size(void *ptr) \
845 { \
846 check_access(ptr, size, \
847 KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE); \
848 } \
849 EXPORT_SYMBOL(__tsan_read_write##size); \
850 void __tsan_unaligned_read_write##size(void *ptr) \
851 __alias(__tsan_read_write##size); \
852 EXPORT_SYMBOL(__tsan_unaligned_read_write##size)
853
854 DEFINE_TSAN_READ_WRITE(1);
855 DEFINE_TSAN_READ_WRITE(2);
856 DEFINE_TSAN_READ_WRITE(4);
857 DEFINE_TSAN_READ_WRITE(8);
858 DEFINE_TSAN_READ_WRITE(16);
859
860 void __tsan_read_range(void *ptr, size_t size);
__tsan_read_range(void * ptr,size_t size)861 void __tsan_read_range(void *ptr, size_t size)
862 {
863 check_access(ptr, size, 0);
864 }
865 EXPORT_SYMBOL(__tsan_read_range);
866
867 void __tsan_write_range(void *ptr, size_t size);
__tsan_write_range(void * ptr,size_t size)868 void __tsan_write_range(void *ptr, size_t size)
869 {
870 check_access(ptr, size, KCSAN_ACCESS_WRITE);
871 }
872 EXPORT_SYMBOL(__tsan_write_range);
873
874 /*
875 * Use of explicit volatile is generally disallowed [1], however, volatile is
876 * still used in various concurrent context, whether in low-level
877 * synchronization primitives or for legacy reasons.
878 * [1] https://lwn.net/Articles/233479/
879 *
880 * We only consider volatile accesses atomic if they are aligned and would pass
881 * the size-check of compiletime_assert_rwonce_type().
882 */
883 #define DEFINE_TSAN_VOLATILE_READ_WRITE(size) \
884 void __tsan_volatile_read##size(void *ptr); \
885 void __tsan_volatile_read##size(void *ptr) \
886 { \
887 const bool is_atomic = size <= sizeof(long long) && \
888 IS_ALIGNED((unsigned long)ptr, size); \
889 if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic) \
890 return; \
891 check_access(ptr, size, is_atomic ? KCSAN_ACCESS_ATOMIC : 0); \
892 } \
893 EXPORT_SYMBOL(__tsan_volatile_read##size); \
894 void __tsan_unaligned_volatile_read##size(void *ptr) \
895 __alias(__tsan_volatile_read##size); \
896 EXPORT_SYMBOL(__tsan_unaligned_volatile_read##size); \
897 void __tsan_volatile_write##size(void *ptr); \
898 void __tsan_volatile_write##size(void *ptr) \
899 { \
900 const bool is_atomic = size <= sizeof(long long) && \
901 IS_ALIGNED((unsigned long)ptr, size); \
902 if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic) \
903 return; \
904 check_access(ptr, size, \
905 KCSAN_ACCESS_WRITE | \
906 (is_atomic ? KCSAN_ACCESS_ATOMIC : 0)); \
907 } \
908 EXPORT_SYMBOL(__tsan_volatile_write##size); \
909 void __tsan_unaligned_volatile_write##size(void *ptr) \
910 __alias(__tsan_volatile_write##size); \
911 EXPORT_SYMBOL(__tsan_unaligned_volatile_write##size)
912
913 DEFINE_TSAN_VOLATILE_READ_WRITE(1);
914 DEFINE_TSAN_VOLATILE_READ_WRITE(2);
915 DEFINE_TSAN_VOLATILE_READ_WRITE(4);
916 DEFINE_TSAN_VOLATILE_READ_WRITE(8);
917 DEFINE_TSAN_VOLATILE_READ_WRITE(16);
918
919 /*
920 * The below are not required by KCSAN, but can still be emitted by the
921 * compiler.
922 */
923 void __tsan_func_entry(void *call_pc);
__tsan_func_entry(void * call_pc)924 void __tsan_func_entry(void *call_pc)
925 {
926 }
927 EXPORT_SYMBOL(__tsan_func_entry);
928 void __tsan_func_exit(void);
__tsan_func_exit(void)929 void __tsan_func_exit(void)
930 {
931 }
932 EXPORT_SYMBOL(__tsan_func_exit);
933 void __tsan_init(void);
__tsan_init(void)934 void __tsan_init(void)
935 {
936 }
937 EXPORT_SYMBOL(__tsan_init);
938
939 /*
940 * Instrumentation for atomic builtins (__atomic_*, __sync_*).
941 *
942 * Normal kernel code _should not_ be using them directly, but some
943 * architectures may implement some or all atomics using the compilers'
944 * builtins.
945 *
946 * Note: If an architecture decides to fully implement atomics using the
947 * builtins, because they are implicitly instrumented by KCSAN (and KASAN,
948 * etc.), implementing the ARCH_ATOMIC interface (to get instrumentation via
949 * atomic-instrumented) is no longer necessary.
950 *
951 * TSAN instrumentation replaces atomic accesses with calls to any of the below
952 * functions, whose job is to also execute the operation itself.
953 */
954
955 #define DEFINE_TSAN_ATOMIC_LOAD_STORE(bits) \
956 u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder); \
957 u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder) \
958 { \
959 if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \
960 check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_ATOMIC); \
961 } \
962 return __atomic_load_n(ptr, memorder); \
963 } \
964 EXPORT_SYMBOL(__tsan_atomic##bits##_load); \
965 void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder); \
966 void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder) \
967 { \
968 if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \
969 check_access(ptr, bits / BITS_PER_BYTE, \
970 KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \
971 } \
972 __atomic_store_n(ptr, v, memorder); \
973 } \
974 EXPORT_SYMBOL(__tsan_atomic##bits##_store)
975
976 #define DEFINE_TSAN_ATOMIC_RMW(op, bits, suffix) \
977 u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder); \
978 u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder) \
979 { \
980 if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \
981 check_access(ptr, bits / BITS_PER_BYTE, \
982 KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \
983 KCSAN_ACCESS_ATOMIC); \
984 } \
985 return __atomic_##op##suffix(ptr, v, memorder); \
986 } \
987 EXPORT_SYMBOL(__tsan_atomic##bits##_##op)
988
989 /*
990 * Note: CAS operations are always classified as write, even in case they
991 * fail. We cannot perform check_access() after a write, as it might lead to
992 * false positives, in cases such as:
993 *
994 * T0: __atomic_compare_exchange_n(&p->flag, &old, 1, ...)
995 *
996 * T1: if (__atomic_load_n(&p->flag, ...)) {
997 * modify *p;
998 * p->flag = 0;
999 * }
1000 *
1001 * The only downside is that, if there are 3 threads, with one CAS that
1002 * succeeds, another CAS that fails, and an unmarked racing operation, we may
1003 * point at the wrong CAS as the source of the race. However, if we assume that
1004 * all CAS can succeed in some other execution, the data race is still valid.
1005 */
1006 #define DEFINE_TSAN_ATOMIC_CMPXCHG(bits, strength, weak) \
1007 int __tsan_atomic##bits##_compare_exchange_##strength(u##bits *ptr, u##bits *exp, \
1008 u##bits val, int mo, int fail_mo); \
1009 int __tsan_atomic##bits##_compare_exchange_##strength(u##bits *ptr, u##bits *exp, \
1010 u##bits val, int mo, int fail_mo) \
1011 { \
1012 if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \
1013 check_access(ptr, bits / BITS_PER_BYTE, \
1014 KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \
1015 KCSAN_ACCESS_ATOMIC); \
1016 } \
1017 return __atomic_compare_exchange_n(ptr, exp, val, weak, mo, fail_mo); \
1018 } \
1019 EXPORT_SYMBOL(__tsan_atomic##bits##_compare_exchange_##strength)
1020
1021 #define DEFINE_TSAN_ATOMIC_CMPXCHG_VAL(bits) \
1022 u##bits __tsan_atomic##bits##_compare_exchange_val(u##bits *ptr, u##bits exp, u##bits val, \
1023 int mo, int fail_mo); \
1024 u##bits __tsan_atomic##bits##_compare_exchange_val(u##bits *ptr, u##bits exp, u##bits val, \
1025 int mo, int fail_mo) \
1026 { \
1027 if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \
1028 check_access(ptr, bits / BITS_PER_BYTE, \
1029 KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \
1030 KCSAN_ACCESS_ATOMIC); \
1031 } \
1032 __atomic_compare_exchange_n(ptr, &exp, val, 0, mo, fail_mo); \
1033 return exp; \
1034 } \
1035 EXPORT_SYMBOL(__tsan_atomic##bits##_compare_exchange_val)
1036
1037 #define DEFINE_TSAN_ATOMIC_OPS(bits) \
1038 DEFINE_TSAN_ATOMIC_LOAD_STORE(bits); \
1039 DEFINE_TSAN_ATOMIC_RMW(exchange, bits, _n); \
1040 DEFINE_TSAN_ATOMIC_RMW(fetch_add, bits, ); \
1041 DEFINE_TSAN_ATOMIC_RMW(fetch_sub, bits, ); \
1042 DEFINE_TSAN_ATOMIC_RMW(fetch_and, bits, ); \
1043 DEFINE_TSAN_ATOMIC_RMW(fetch_or, bits, ); \
1044 DEFINE_TSAN_ATOMIC_RMW(fetch_xor, bits, ); \
1045 DEFINE_TSAN_ATOMIC_RMW(fetch_nand, bits, ); \
1046 DEFINE_TSAN_ATOMIC_CMPXCHG(bits, strong, 0); \
1047 DEFINE_TSAN_ATOMIC_CMPXCHG(bits, weak, 1); \
1048 DEFINE_TSAN_ATOMIC_CMPXCHG_VAL(bits)
1049
1050 DEFINE_TSAN_ATOMIC_OPS(8);
1051 DEFINE_TSAN_ATOMIC_OPS(16);
1052 DEFINE_TSAN_ATOMIC_OPS(32);
1053 #ifdef CONFIG_64BIT
1054 DEFINE_TSAN_ATOMIC_OPS(64);
1055 #endif
1056
1057 void __tsan_atomic_thread_fence(int memorder);
__tsan_atomic_thread_fence(int memorder)1058 void __tsan_atomic_thread_fence(int memorder)
1059 {
1060 __atomic_thread_fence(memorder);
1061 }
1062 EXPORT_SYMBOL(__tsan_atomic_thread_fence);
1063
1064 void __tsan_atomic_signal_fence(int memorder);
__tsan_atomic_signal_fence(int memorder)1065 void __tsan_atomic_signal_fence(int memorder) { }
1066 EXPORT_SYMBOL(__tsan_atomic_signal_fence);
1067
1068 #ifdef __HAVE_ARCH_MEMSET
1069 void *__tsan_memset(void *s, int c, size_t count);
__tsan_memset(void * s,int c,size_t count)1070 noinline void *__tsan_memset(void *s, int c, size_t count)
1071 {
1072 /*
1073 * Instead of not setting up watchpoints where accessed size is greater
1074 * than MAX_ENCODABLE_SIZE, truncate checked size to MAX_ENCODABLE_SIZE.
1075 */
1076 size_t check_len = min_t(size_t, count, MAX_ENCODABLE_SIZE);
1077
1078 check_access(s, check_len, KCSAN_ACCESS_WRITE);
1079 return memset(s, c, count);
1080 }
1081 #else
1082 void *__tsan_memset(void *s, int c, size_t count) __alias(memset);
1083 #endif
1084 EXPORT_SYMBOL(__tsan_memset);
1085
1086 #ifdef __HAVE_ARCH_MEMMOVE
1087 void *__tsan_memmove(void *dst, const void *src, size_t len);
__tsan_memmove(void * dst,const void * src,size_t len)1088 noinline void *__tsan_memmove(void *dst, const void *src, size_t len)
1089 {
1090 size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE);
1091
1092 check_access(dst, check_len, KCSAN_ACCESS_WRITE);
1093 check_access(src, check_len, 0);
1094 return memmove(dst, src, len);
1095 }
1096 #else
1097 void *__tsan_memmove(void *dst, const void *src, size_t len) __alias(memmove);
1098 #endif
1099 EXPORT_SYMBOL(__tsan_memmove);
1100
1101 #ifdef __HAVE_ARCH_MEMCPY
1102 void *__tsan_memcpy(void *dst, const void *src, size_t len);
__tsan_memcpy(void * dst,const void * src,size_t len)1103 noinline void *__tsan_memcpy(void *dst, const void *src, size_t len)
1104 {
1105 size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE);
1106
1107 check_access(dst, check_len, KCSAN_ACCESS_WRITE);
1108 check_access(src, check_len, 0);
1109 return memcpy(dst, src, len);
1110 }
1111 #else
1112 void *__tsan_memcpy(void *dst, const void *src, size_t len) __alias(memcpy);
1113 #endif
1114 EXPORT_SYMBOL(__tsan_memcpy);
1115