• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * kmemcheck - a heavyweight memory checker for the linux kernel
3  * Copyright (C) 2007, 2008  Vegard Nossum <vegardno@ifi.uio.no>
4  * (With a lot of help from Ingo Molnar and Pekka Enberg.)
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2) as
8  * published by the Free Software Foundation.
9  */
10 
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/kallsyms.h>
14 #include <linux/kernel.h>
15 #include <linux/kmemcheck.h>
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/page-flags.h>
19 #include <linux/percpu.h>
20 #include <linux/ptrace.h>
21 #include <linux/string.h>
22 #include <linux/types.h>
23 
24 #include <asm/cacheflush.h>
25 #include <asm/kmemcheck.h>
26 #include <asm/pgtable.h>
27 #include <asm/tlbflush.h>
28 
29 #include "error.h"
30 #include "opcode.h"
31 #include "pte.h"
32 #include "selftest.h"
33 #include "shadow.h"
34 
35 
36 #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
37 #  define KMEMCHECK_ENABLED 0
38 #endif
39 
40 #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
41 #  define KMEMCHECK_ENABLED 1
42 #endif
43 
44 #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
45 #  define KMEMCHECK_ENABLED 2
46 #endif
47 
48 int kmemcheck_enabled = KMEMCHECK_ENABLED;
49 
kmemcheck_init(void)50 int __init kmemcheck_init(void)
51 {
52 #ifdef CONFIG_SMP
53 	/*
54 	 * Limit SMP to use a single CPU. We rely on the fact that this code
55 	 * runs before SMP is set up.
56 	 */
57 	if (setup_max_cpus > 1) {
58 		printk(KERN_INFO
59 			"kmemcheck: Limiting number of CPUs to 1.\n");
60 		setup_max_cpus = 1;
61 	}
62 #endif
63 
64 	if (!kmemcheck_selftest()) {
65 		printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
66 		kmemcheck_enabled = 0;
67 		return -EINVAL;
68 	}
69 
70 	printk(KERN_INFO "kmemcheck: Initialized\n");
71 	return 0;
72 }
73 
74 early_initcall(kmemcheck_init);
75 
76 /*
77  * We need to parse the kmemcheck= option before any memory is allocated.
78  */
param_kmemcheck(char * str)79 static int __init param_kmemcheck(char *str)
80 {
81 	int val;
82 	int ret;
83 
84 	if (!str)
85 		return -EINVAL;
86 
87 	ret = kstrtoint(str, 0, &val);
88 	if (ret)
89 		return ret;
90 	kmemcheck_enabled = val;
91 	return 0;
92 }
93 
94 early_param("kmemcheck", param_kmemcheck);
95 
kmemcheck_show_addr(unsigned long address)96 int kmemcheck_show_addr(unsigned long address)
97 {
98 	pte_t *pte;
99 
100 	pte = kmemcheck_pte_lookup(address);
101 	if (!pte)
102 		return 0;
103 
104 	set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
105 	__flush_tlb_one(address);
106 	return 1;
107 }
108 
kmemcheck_hide_addr(unsigned long address)109 int kmemcheck_hide_addr(unsigned long address)
110 {
111 	pte_t *pte;
112 
113 	pte = kmemcheck_pte_lookup(address);
114 	if (!pte)
115 		return 0;
116 
117 	set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
118 	__flush_tlb_one(address);
119 	return 1;
120 }
121 
122 struct kmemcheck_context {
123 	bool busy;
124 	int balance;
125 
126 	/*
127 	 * There can be at most two memory operands to an instruction, but
128 	 * each address can cross a page boundary -- so we may need up to
129 	 * four addresses that must be hidden/revealed for each fault.
130 	 */
131 	unsigned long addr[4];
132 	unsigned long n_addrs;
133 	unsigned long flags;
134 
135 	/* Data size of the instruction that caused a fault. */
136 	unsigned int size;
137 };
138 
139 static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
140 
kmemcheck_active(struct pt_regs * regs)141 bool kmemcheck_active(struct pt_regs *regs)
142 {
143 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
144 
145 	return data->balance > 0;
146 }
147 
148 /* Save an address that needs to be shown/hidden */
kmemcheck_save_addr(unsigned long addr)149 static void kmemcheck_save_addr(unsigned long addr)
150 {
151 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
152 
153 	BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
154 	data->addr[data->n_addrs++] = addr;
155 }
156 
kmemcheck_show_all(void)157 static unsigned int kmemcheck_show_all(void)
158 {
159 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
160 	unsigned int i;
161 	unsigned int n;
162 
163 	n = 0;
164 	for (i = 0; i < data->n_addrs; ++i)
165 		n += kmemcheck_show_addr(data->addr[i]);
166 
167 	return n;
168 }
169 
kmemcheck_hide_all(void)170 static unsigned int kmemcheck_hide_all(void)
171 {
172 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
173 	unsigned int i;
174 	unsigned int n;
175 
176 	n = 0;
177 	for (i = 0; i < data->n_addrs; ++i)
178 		n += kmemcheck_hide_addr(data->addr[i]);
179 
180 	return n;
181 }
182 
183 /*
184  * Called from the #PF handler.
185  */
kmemcheck_show(struct pt_regs * regs)186 void kmemcheck_show(struct pt_regs *regs)
187 {
188 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
189 
190 	BUG_ON(!irqs_disabled());
191 
192 	if (unlikely(data->balance != 0)) {
193 		kmemcheck_show_all();
194 		kmemcheck_error_save_bug(regs);
195 		data->balance = 0;
196 		return;
197 	}
198 
199 	/*
200 	 * None of the addresses actually belonged to kmemcheck. Note that
201 	 * this is not an error.
202 	 */
203 	if (kmemcheck_show_all() == 0)
204 		return;
205 
206 	++data->balance;
207 
208 	/*
209 	 * The IF needs to be cleared as well, so that the faulting
210 	 * instruction can run "uninterrupted". Otherwise, we might take
211 	 * an interrupt and start executing that before we've had a chance
212 	 * to hide the page again.
213 	 *
214 	 * NOTE: In the rare case of multiple faults, we must not override
215 	 * the original flags:
216 	 */
217 	if (!(regs->flags & X86_EFLAGS_TF))
218 		data->flags = regs->flags;
219 
220 	regs->flags |= X86_EFLAGS_TF;
221 	regs->flags &= ~X86_EFLAGS_IF;
222 }
223 
224 /*
225  * Called from the #DB handler.
226  */
kmemcheck_hide(struct pt_regs * regs)227 void kmemcheck_hide(struct pt_regs *regs)
228 {
229 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
230 	int n;
231 
232 	BUG_ON(!irqs_disabled());
233 
234 	if (unlikely(data->balance != 1)) {
235 		kmemcheck_show_all();
236 		kmemcheck_error_save_bug(regs);
237 		data->n_addrs = 0;
238 		data->balance = 0;
239 
240 		if (!(data->flags & X86_EFLAGS_TF))
241 			regs->flags &= ~X86_EFLAGS_TF;
242 		if (data->flags & X86_EFLAGS_IF)
243 			regs->flags |= X86_EFLAGS_IF;
244 		return;
245 	}
246 
247 	if (kmemcheck_enabled)
248 		n = kmemcheck_hide_all();
249 	else
250 		n = kmemcheck_show_all();
251 
252 	if (n == 0)
253 		return;
254 
255 	--data->balance;
256 
257 	data->n_addrs = 0;
258 
259 	if (!(data->flags & X86_EFLAGS_TF))
260 		regs->flags &= ~X86_EFLAGS_TF;
261 	if (data->flags & X86_EFLAGS_IF)
262 		regs->flags |= X86_EFLAGS_IF;
263 }
264 
kmemcheck_show_pages(struct page * p,unsigned int n)265 void kmemcheck_show_pages(struct page *p, unsigned int n)
266 {
267 	unsigned int i;
268 
269 	for (i = 0; i < n; ++i) {
270 		unsigned long address;
271 		pte_t *pte;
272 		unsigned int level;
273 
274 		address = (unsigned long) page_address(&p[i]);
275 		pte = lookup_address(address, &level);
276 		BUG_ON(!pte);
277 		BUG_ON(level != PG_LEVEL_4K);
278 
279 		set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
280 		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
281 		__flush_tlb_one(address);
282 	}
283 }
284 
kmemcheck_page_is_tracked(struct page * p)285 bool kmemcheck_page_is_tracked(struct page *p)
286 {
287 	/* This will also check the "hidden" flag of the PTE. */
288 	return kmemcheck_pte_lookup((unsigned long) page_address(p));
289 }
290 
kmemcheck_hide_pages(struct page * p,unsigned int n)291 void kmemcheck_hide_pages(struct page *p, unsigned int n)
292 {
293 	unsigned int i;
294 
295 	for (i = 0; i < n; ++i) {
296 		unsigned long address;
297 		pte_t *pte;
298 		unsigned int level;
299 
300 		address = (unsigned long) page_address(&p[i]);
301 		pte = lookup_address(address, &level);
302 		BUG_ON(!pte);
303 		BUG_ON(level != PG_LEVEL_4K);
304 
305 		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
306 		set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
307 		__flush_tlb_one(address);
308 	}
309 }
310 
311 /* Access may NOT cross page boundary */
kmemcheck_read_strict(struct pt_regs * regs,unsigned long addr,unsigned int size)312 static void kmemcheck_read_strict(struct pt_regs *regs,
313 	unsigned long addr, unsigned int size)
314 {
315 	void *shadow;
316 	enum kmemcheck_shadow status;
317 
318 	shadow = kmemcheck_shadow_lookup(addr);
319 	if (!shadow)
320 		return;
321 
322 	kmemcheck_save_addr(addr);
323 	status = kmemcheck_shadow_test(shadow, size);
324 	if (status == KMEMCHECK_SHADOW_INITIALIZED)
325 		return;
326 
327 	if (kmemcheck_enabled)
328 		kmemcheck_error_save(status, addr, size, regs);
329 
330 	if (kmemcheck_enabled == 2)
331 		kmemcheck_enabled = 0;
332 
333 	/* Don't warn about it again. */
334 	kmemcheck_shadow_set(shadow, size);
335 }
336 
kmemcheck_is_obj_initialized(unsigned long addr,size_t size)337 bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
338 {
339 	enum kmemcheck_shadow status;
340 	void *shadow;
341 
342 	shadow = kmemcheck_shadow_lookup(addr);
343 	if (!shadow)
344 		return true;
345 
346 	status = kmemcheck_shadow_test_all(shadow, size);
347 
348 	return status == KMEMCHECK_SHADOW_INITIALIZED;
349 }
350 
351 /* Access may cross page boundary */
kmemcheck_read(struct pt_regs * regs,unsigned long addr,unsigned int size)352 static void kmemcheck_read(struct pt_regs *regs,
353 	unsigned long addr, unsigned int size)
354 {
355 	unsigned long page = addr & PAGE_MASK;
356 	unsigned long next_addr = addr + size - 1;
357 	unsigned long next_page = next_addr & PAGE_MASK;
358 
359 	if (likely(page == next_page)) {
360 		kmemcheck_read_strict(regs, addr, size);
361 		return;
362 	}
363 
364 	/*
365 	 * What we do is basically to split the access across the
366 	 * two pages and handle each part separately. Yes, this means
367 	 * that we may now see reads that are 3 + 5 bytes, for
368 	 * example (and if both are uninitialized, there will be two
369 	 * reports), but it makes the code a lot simpler.
370 	 */
371 	kmemcheck_read_strict(regs, addr, next_page - addr);
372 	kmemcheck_read_strict(regs, next_page, next_addr - next_page);
373 }
374 
kmemcheck_write_strict(struct pt_regs * regs,unsigned long addr,unsigned int size)375 static void kmemcheck_write_strict(struct pt_regs *regs,
376 	unsigned long addr, unsigned int size)
377 {
378 	void *shadow;
379 
380 	shadow = kmemcheck_shadow_lookup(addr);
381 	if (!shadow)
382 		return;
383 
384 	kmemcheck_save_addr(addr);
385 	kmemcheck_shadow_set(shadow, size);
386 }
387 
kmemcheck_write(struct pt_regs * regs,unsigned long addr,unsigned int size)388 static void kmemcheck_write(struct pt_regs *regs,
389 	unsigned long addr, unsigned int size)
390 {
391 	unsigned long page = addr & PAGE_MASK;
392 	unsigned long next_addr = addr + size - 1;
393 	unsigned long next_page = next_addr & PAGE_MASK;
394 
395 	if (likely(page == next_page)) {
396 		kmemcheck_write_strict(regs, addr, size);
397 		return;
398 	}
399 
400 	/* See comment in kmemcheck_read(). */
401 	kmemcheck_write_strict(regs, addr, next_page - addr);
402 	kmemcheck_write_strict(regs, next_page, next_addr - next_page);
403 }
404 
405 /*
406  * Copying is hard. We have two addresses, each of which may be split across
407  * a page (and each page will have different shadow addresses).
408  */
kmemcheck_copy(struct pt_regs * regs,unsigned long src_addr,unsigned long dst_addr,unsigned int size)409 static void kmemcheck_copy(struct pt_regs *regs,
410 	unsigned long src_addr, unsigned long dst_addr, unsigned int size)
411 {
412 	uint8_t shadow[8];
413 	enum kmemcheck_shadow status;
414 
415 	unsigned long page;
416 	unsigned long next_addr;
417 	unsigned long next_page;
418 
419 	uint8_t *x;
420 	unsigned int i;
421 	unsigned int n;
422 
423 	BUG_ON(size > sizeof(shadow));
424 
425 	page = src_addr & PAGE_MASK;
426 	next_addr = src_addr + size - 1;
427 	next_page = next_addr & PAGE_MASK;
428 
429 	if (likely(page == next_page)) {
430 		/* Same page */
431 		x = kmemcheck_shadow_lookup(src_addr);
432 		if (x) {
433 			kmemcheck_save_addr(src_addr);
434 			for (i = 0; i < size; ++i)
435 				shadow[i] = x[i];
436 		} else {
437 			for (i = 0; i < size; ++i)
438 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
439 		}
440 	} else {
441 		n = next_page - src_addr;
442 		BUG_ON(n > sizeof(shadow));
443 
444 		/* First page */
445 		x = kmemcheck_shadow_lookup(src_addr);
446 		if (x) {
447 			kmemcheck_save_addr(src_addr);
448 			for (i = 0; i < n; ++i)
449 				shadow[i] = x[i];
450 		} else {
451 			/* Not tracked */
452 			for (i = 0; i < n; ++i)
453 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
454 		}
455 
456 		/* Second page */
457 		x = kmemcheck_shadow_lookup(next_page);
458 		if (x) {
459 			kmemcheck_save_addr(next_page);
460 			for (i = n; i < size; ++i)
461 				shadow[i] = x[i - n];
462 		} else {
463 			/* Not tracked */
464 			for (i = n; i < size; ++i)
465 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
466 		}
467 	}
468 
469 	page = dst_addr & PAGE_MASK;
470 	next_addr = dst_addr + size - 1;
471 	next_page = next_addr & PAGE_MASK;
472 
473 	if (likely(page == next_page)) {
474 		/* Same page */
475 		x = kmemcheck_shadow_lookup(dst_addr);
476 		if (x) {
477 			kmemcheck_save_addr(dst_addr);
478 			for (i = 0; i < size; ++i) {
479 				x[i] = shadow[i];
480 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
481 			}
482 		}
483 	} else {
484 		n = next_page - dst_addr;
485 		BUG_ON(n > sizeof(shadow));
486 
487 		/* First page */
488 		x = kmemcheck_shadow_lookup(dst_addr);
489 		if (x) {
490 			kmemcheck_save_addr(dst_addr);
491 			for (i = 0; i < n; ++i) {
492 				x[i] = shadow[i];
493 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
494 			}
495 		}
496 
497 		/* Second page */
498 		x = kmemcheck_shadow_lookup(next_page);
499 		if (x) {
500 			kmemcheck_save_addr(next_page);
501 			for (i = n; i < size; ++i) {
502 				x[i - n] = shadow[i];
503 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
504 			}
505 		}
506 	}
507 
508 	status = kmemcheck_shadow_test(shadow, size);
509 	if (status == KMEMCHECK_SHADOW_INITIALIZED)
510 		return;
511 
512 	if (kmemcheck_enabled)
513 		kmemcheck_error_save(status, src_addr, size, regs);
514 
515 	if (kmemcheck_enabled == 2)
516 		kmemcheck_enabled = 0;
517 }
518 
519 enum kmemcheck_method {
520 	KMEMCHECK_READ,
521 	KMEMCHECK_WRITE,
522 };
523 
kmemcheck_access(struct pt_regs * regs,unsigned long fallback_address,enum kmemcheck_method fallback_method)524 static void kmemcheck_access(struct pt_regs *regs,
525 	unsigned long fallback_address, enum kmemcheck_method fallback_method)
526 {
527 	const uint8_t *insn;
528 	const uint8_t *insn_primary;
529 	unsigned int size;
530 
531 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
532 
533 	/* Recursive fault -- ouch. */
534 	if (data->busy) {
535 		kmemcheck_show_addr(fallback_address);
536 		kmemcheck_error_save_bug(regs);
537 		return;
538 	}
539 
540 	data->busy = true;
541 
542 	insn = (const uint8_t *) regs->ip;
543 	insn_primary = kmemcheck_opcode_get_primary(insn);
544 
545 	kmemcheck_opcode_decode(insn, &size);
546 
547 	switch (insn_primary[0]) {
548 #ifdef CONFIG_KMEMCHECK_BITOPS_OK
549 		/* AND, OR, XOR */
550 		/*
551 		 * Unfortunately, these instructions have to be excluded from
552 		 * our regular checking since they access only some (and not
553 		 * all) bits. This clears out "bogus" bitfield-access warnings.
554 		 */
555 	case 0x80:
556 	case 0x81:
557 	case 0x82:
558 	case 0x83:
559 		switch ((insn_primary[1] >> 3) & 7) {
560 			/* OR */
561 		case 1:
562 			/* AND */
563 		case 4:
564 			/* XOR */
565 		case 6:
566 			kmemcheck_write(regs, fallback_address, size);
567 			goto out;
568 
569 			/* ADD */
570 		case 0:
571 			/* ADC */
572 		case 2:
573 			/* SBB */
574 		case 3:
575 			/* SUB */
576 		case 5:
577 			/* CMP */
578 		case 7:
579 			break;
580 		}
581 		break;
582 #endif
583 
584 		/* MOVS, MOVSB, MOVSW, MOVSD */
585 	case 0xa4:
586 	case 0xa5:
587 		/*
588 		 * These instructions are special because they take two
589 		 * addresses, but we only get one page fault.
590 		 */
591 		kmemcheck_copy(regs, regs->si, regs->di, size);
592 		goto out;
593 
594 		/* CMPS, CMPSB, CMPSW, CMPSD */
595 	case 0xa6:
596 	case 0xa7:
597 		kmemcheck_read(regs, regs->si, size);
598 		kmemcheck_read(regs, regs->di, size);
599 		goto out;
600 	}
601 
602 	/*
603 	 * If the opcode isn't special in any way, we use the data from the
604 	 * page fault handler to determine the address and type of memory
605 	 * access.
606 	 */
607 	switch (fallback_method) {
608 	case KMEMCHECK_READ:
609 		kmemcheck_read(regs, fallback_address, size);
610 		goto out;
611 	case KMEMCHECK_WRITE:
612 		kmemcheck_write(regs, fallback_address, size);
613 		goto out;
614 	}
615 
616 out:
617 	data->busy = false;
618 }
619 
kmemcheck_fault(struct pt_regs * regs,unsigned long address,unsigned long error_code)620 bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
621 	unsigned long error_code)
622 {
623 	pte_t *pte;
624 
625 	/*
626 	 * XXX: Is it safe to assume that memory accesses from virtual 86
627 	 * mode or non-kernel code segments will _never_ access kernel
628 	 * memory (e.g. tracked pages)? For now, we need this to avoid
629 	 * invoking kmemcheck for PnP BIOS calls.
630 	 */
631 	if (regs->flags & X86_VM_MASK)
632 		return false;
633 	if (regs->cs != __KERNEL_CS)
634 		return false;
635 
636 	pte = kmemcheck_pte_lookup(address);
637 	if (!pte)
638 		return false;
639 
640 	WARN_ON_ONCE(in_nmi());
641 
642 	if (error_code & 2)
643 		kmemcheck_access(regs, address, KMEMCHECK_WRITE);
644 	else
645 		kmemcheck_access(regs, address, KMEMCHECK_READ);
646 
647 	kmemcheck_show(regs);
648 	return true;
649 }
650 
kmemcheck_trap(struct pt_regs * regs)651 bool kmemcheck_trap(struct pt_regs *regs)
652 {
653 	if (!kmemcheck_active(regs))
654 		return false;
655 
656 	/* We're done. */
657 	kmemcheck_hide(regs);
658 	return true;
659 }
660