• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * kmemcheck - a heavyweight memory checker for the linux kernel
3  * Copyright (C) 2007, 2008  Vegard Nossum <vegardno@ifi.uio.no>
4  * (With a lot of help from Ingo Molnar and Pekka Enberg.)
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2) as
8  * published by the Free Software Foundation.
9  */
10 
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/kallsyms.h>
14 #include <linux/kernel.h>
15 #include <linux/kmemcheck.h>
16 #include <linux/mm.h>
17 #include <linux/page-flags.h>
18 #include <linux/percpu.h>
19 #include <linux/ptrace.h>
20 #include <linux/string.h>
21 #include <linux/types.h>
22 
23 #include <asm/cacheflush.h>
24 #include <asm/kmemcheck.h>
25 #include <asm/pgtable.h>
26 #include <asm/tlbflush.h>
27 
28 #include "error.h"
29 #include "opcode.h"
30 #include "pte.h"
31 #include "selftest.h"
32 #include "shadow.h"
33 
34 
35 #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
36 #  define KMEMCHECK_ENABLED 0
37 #endif
38 
39 #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
40 #  define KMEMCHECK_ENABLED 1
41 #endif
42 
43 #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
44 #  define KMEMCHECK_ENABLED 2
45 #endif
46 
47 int kmemcheck_enabled = KMEMCHECK_ENABLED;
48 
kmemcheck_init(void)49 int __init kmemcheck_init(void)
50 {
51 #ifdef CONFIG_SMP
52 	/*
53 	 * Limit SMP to use a single CPU. We rely on the fact that this code
54 	 * runs before SMP is set up.
55 	 */
56 	if (setup_max_cpus > 1) {
57 		printk(KERN_INFO
58 			"kmemcheck: Limiting number of CPUs to 1.\n");
59 		setup_max_cpus = 1;
60 	}
61 #endif
62 
63 	if (!kmemcheck_selftest()) {
64 		printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
65 		kmemcheck_enabled = 0;
66 		return -EINVAL;
67 	}
68 
69 	printk(KERN_INFO "kmemcheck: Initialized\n");
70 	return 0;
71 }
72 
73 early_initcall(kmemcheck_init);
74 
75 /*
76  * We need to parse the kmemcheck= option before any memory is allocated.
77  */
param_kmemcheck(char * str)78 static int __init param_kmemcheck(char *str)
79 {
80 	int val;
81 	int ret;
82 
83 	if (!str)
84 		return -EINVAL;
85 
86 	ret = kstrtoint(str, 0, &val);
87 	if (ret)
88 		return ret;
89 	kmemcheck_enabled = val;
90 	return 0;
91 }
92 
93 early_param("kmemcheck", param_kmemcheck);
94 
kmemcheck_show_addr(unsigned long address)95 int kmemcheck_show_addr(unsigned long address)
96 {
97 	pte_t *pte;
98 
99 	pte = kmemcheck_pte_lookup(address);
100 	if (!pte)
101 		return 0;
102 
103 	set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
104 	__flush_tlb_one(address);
105 	return 1;
106 }
107 
kmemcheck_hide_addr(unsigned long address)108 int kmemcheck_hide_addr(unsigned long address)
109 {
110 	pte_t *pte;
111 
112 	pte = kmemcheck_pte_lookup(address);
113 	if (!pte)
114 		return 0;
115 
116 	set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
117 	__flush_tlb_one(address);
118 	return 1;
119 }
120 
121 struct kmemcheck_context {
122 	bool busy;
123 	int balance;
124 
125 	/*
126 	 * There can be at most two memory operands to an instruction, but
127 	 * each address can cross a page boundary -- so we may need up to
128 	 * four addresses that must be hidden/revealed for each fault.
129 	 */
130 	unsigned long addr[4];
131 	unsigned long n_addrs;
132 	unsigned long flags;
133 
134 	/* Data size of the instruction that caused a fault. */
135 	unsigned int size;
136 };
137 
138 static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
139 
kmemcheck_active(struct pt_regs * regs)140 bool kmemcheck_active(struct pt_regs *regs)
141 {
142 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
143 
144 	return data->balance > 0;
145 }
146 
147 /* Save an address that needs to be shown/hidden */
kmemcheck_save_addr(unsigned long addr)148 static void kmemcheck_save_addr(unsigned long addr)
149 {
150 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
151 
152 	BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
153 	data->addr[data->n_addrs++] = addr;
154 }
155 
kmemcheck_show_all(void)156 static unsigned int kmemcheck_show_all(void)
157 {
158 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
159 	unsigned int i;
160 	unsigned int n;
161 
162 	n = 0;
163 	for (i = 0; i < data->n_addrs; ++i)
164 		n += kmemcheck_show_addr(data->addr[i]);
165 
166 	return n;
167 }
168 
kmemcheck_hide_all(void)169 static unsigned int kmemcheck_hide_all(void)
170 {
171 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
172 	unsigned int i;
173 	unsigned int n;
174 
175 	n = 0;
176 	for (i = 0; i < data->n_addrs; ++i)
177 		n += kmemcheck_hide_addr(data->addr[i]);
178 
179 	return n;
180 }
181 
182 /*
183  * Called from the #PF handler.
184  */
kmemcheck_show(struct pt_regs * regs)185 void kmemcheck_show(struct pt_regs *regs)
186 {
187 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
188 
189 	BUG_ON(!irqs_disabled());
190 
191 	if (unlikely(data->balance != 0)) {
192 		kmemcheck_show_all();
193 		kmemcheck_error_save_bug(regs);
194 		data->balance = 0;
195 		return;
196 	}
197 
198 	/*
199 	 * None of the addresses actually belonged to kmemcheck. Note that
200 	 * this is not an error.
201 	 */
202 	if (kmemcheck_show_all() == 0)
203 		return;
204 
205 	++data->balance;
206 
207 	/*
208 	 * The IF needs to be cleared as well, so that the faulting
209 	 * instruction can run "uninterrupted". Otherwise, we might take
210 	 * an interrupt and start executing that before we've had a chance
211 	 * to hide the page again.
212 	 *
213 	 * NOTE: In the rare case of multiple faults, we must not override
214 	 * the original flags:
215 	 */
216 	if (!(regs->flags & X86_EFLAGS_TF))
217 		data->flags = regs->flags;
218 
219 	regs->flags |= X86_EFLAGS_TF;
220 	regs->flags &= ~X86_EFLAGS_IF;
221 }
222 
223 /*
224  * Called from the #DB handler.
225  */
kmemcheck_hide(struct pt_regs * regs)226 void kmemcheck_hide(struct pt_regs *regs)
227 {
228 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
229 	int n;
230 
231 	BUG_ON(!irqs_disabled());
232 
233 	if (unlikely(data->balance != 1)) {
234 		kmemcheck_show_all();
235 		kmemcheck_error_save_bug(regs);
236 		data->n_addrs = 0;
237 		data->balance = 0;
238 
239 		if (!(data->flags & X86_EFLAGS_TF))
240 			regs->flags &= ~X86_EFLAGS_TF;
241 		if (data->flags & X86_EFLAGS_IF)
242 			regs->flags |= X86_EFLAGS_IF;
243 		return;
244 	}
245 
246 	if (kmemcheck_enabled)
247 		n = kmemcheck_hide_all();
248 	else
249 		n = kmemcheck_show_all();
250 
251 	if (n == 0)
252 		return;
253 
254 	--data->balance;
255 
256 	data->n_addrs = 0;
257 
258 	if (!(data->flags & X86_EFLAGS_TF))
259 		regs->flags &= ~X86_EFLAGS_TF;
260 	if (data->flags & X86_EFLAGS_IF)
261 		regs->flags |= X86_EFLAGS_IF;
262 }
263 
kmemcheck_show_pages(struct page * p,unsigned int n)264 void kmemcheck_show_pages(struct page *p, unsigned int n)
265 {
266 	unsigned int i;
267 
268 	for (i = 0; i < n; ++i) {
269 		unsigned long address;
270 		pte_t *pte;
271 		unsigned int level;
272 
273 		address = (unsigned long) page_address(&p[i]);
274 		pte = lookup_address(address, &level);
275 		BUG_ON(!pte);
276 		BUG_ON(level != PG_LEVEL_4K);
277 
278 		set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
279 		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
280 		__flush_tlb_one(address);
281 	}
282 }
283 
kmemcheck_page_is_tracked(struct page * p)284 bool kmemcheck_page_is_tracked(struct page *p)
285 {
286 	/* This will also check the "hidden" flag of the PTE. */
287 	return kmemcheck_pte_lookup((unsigned long) page_address(p));
288 }
289 
kmemcheck_hide_pages(struct page * p,unsigned int n)290 void kmemcheck_hide_pages(struct page *p, unsigned int n)
291 {
292 	unsigned int i;
293 
294 	for (i = 0; i < n; ++i) {
295 		unsigned long address;
296 		pte_t *pte;
297 		unsigned int level;
298 
299 		address = (unsigned long) page_address(&p[i]);
300 		pte = lookup_address(address, &level);
301 		BUG_ON(!pte);
302 		BUG_ON(level != PG_LEVEL_4K);
303 
304 		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
305 		set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
306 		__flush_tlb_one(address);
307 	}
308 }
309 
310 /* Access may NOT cross page boundary */
kmemcheck_read_strict(struct pt_regs * regs,unsigned long addr,unsigned int size)311 static void kmemcheck_read_strict(struct pt_regs *regs,
312 	unsigned long addr, unsigned int size)
313 {
314 	void *shadow;
315 	enum kmemcheck_shadow status;
316 
317 	shadow = kmemcheck_shadow_lookup(addr);
318 	if (!shadow)
319 		return;
320 
321 	kmemcheck_save_addr(addr);
322 	status = kmemcheck_shadow_test(shadow, size);
323 	if (status == KMEMCHECK_SHADOW_INITIALIZED)
324 		return;
325 
326 	if (kmemcheck_enabled)
327 		kmemcheck_error_save(status, addr, size, regs);
328 
329 	if (kmemcheck_enabled == 2)
330 		kmemcheck_enabled = 0;
331 
332 	/* Don't warn about it again. */
333 	kmemcheck_shadow_set(shadow, size);
334 }
335 
kmemcheck_is_obj_initialized(unsigned long addr,size_t size)336 bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
337 {
338 	enum kmemcheck_shadow status;
339 	void *shadow;
340 
341 	shadow = kmemcheck_shadow_lookup(addr);
342 	if (!shadow)
343 		return true;
344 
345 	status = kmemcheck_shadow_test_all(shadow, size);
346 
347 	return status == KMEMCHECK_SHADOW_INITIALIZED;
348 }
349 
350 /* Access may cross page boundary */
kmemcheck_read(struct pt_regs * regs,unsigned long addr,unsigned int size)351 static void kmemcheck_read(struct pt_regs *regs,
352 	unsigned long addr, unsigned int size)
353 {
354 	unsigned long page = addr & PAGE_MASK;
355 	unsigned long next_addr = addr + size - 1;
356 	unsigned long next_page = next_addr & PAGE_MASK;
357 
358 	if (likely(page == next_page)) {
359 		kmemcheck_read_strict(regs, addr, size);
360 		return;
361 	}
362 
363 	/*
364 	 * What we do is basically to split the access across the
365 	 * two pages and handle each part separately. Yes, this means
366 	 * that we may now see reads that are 3 + 5 bytes, for
367 	 * example (and if both are uninitialized, there will be two
368 	 * reports), but it makes the code a lot simpler.
369 	 */
370 	kmemcheck_read_strict(regs, addr, next_page - addr);
371 	kmemcheck_read_strict(regs, next_page, next_addr - next_page);
372 }
373 
kmemcheck_write_strict(struct pt_regs * regs,unsigned long addr,unsigned int size)374 static void kmemcheck_write_strict(struct pt_regs *regs,
375 	unsigned long addr, unsigned int size)
376 {
377 	void *shadow;
378 
379 	shadow = kmemcheck_shadow_lookup(addr);
380 	if (!shadow)
381 		return;
382 
383 	kmemcheck_save_addr(addr);
384 	kmemcheck_shadow_set(shadow, size);
385 }
386 
kmemcheck_write(struct pt_regs * regs,unsigned long addr,unsigned int size)387 static void kmemcheck_write(struct pt_regs *regs,
388 	unsigned long addr, unsigned int size)
389 {
390 	unsigned long page = addr & PAGE_MASK;
391 	unsigned long next_addr = addr + size - 1;
392 	unsigned long next_page = next_addr & PAGE_MASK;
393 
394 	if (likely(page == next_page)) {
395 		kmemcheck_write_strict(regs, addr, size);
396 		return;
397 	}
398 
399 	/* See comment in kmemcheck_read(). */
400 	kmemcheck_write_strict(regs, addr, next_page - addr);
401 	kmemcheck_write_strict(regs, next_page, next_addr - next_page);
402 }
403 
404 /*
405  * Copying is hard. We have two addresses, each of which may be split across
406  * a page (and each page will have different shadow addresses).
407  */
kmemcheck_copy(struct pt_regs * regs,unsigned long src_addr,unsigned long dst_addr,unsigned int size)408 static void kmemcheck_copy(struct pt_regs *regs,
409 	unsigned long src_addr, unsigned long dst_addr, unsigned int size)
410 {
411 	uint8_t shadow[8];
412 	enum kmemcheck_shadow status;
413 
414 	unsigned long page;
415 	unsigned long next_addr;
416 	unsigned long next_page;
417 
418 	uint8_t *x;
419 	unsigned int i;
420 	unsigned int n;
421 
422 	BUG_ON(size > sizeof(shadow));
423 
424 	page = src_addr & PAGE_MASK;
425 	next_addr = src_addr + size - 1;
426 	next_page = next_addr & PAGE_MASK;
427 
428 	if (likely(page == next_page)) {
429 		/* Same page */
430 		x = kmemcheck_shadow_lookup(src_addr);
431 		if (x) {
432 			kmemcheck_save_addr(src_addr);
433 			for (i = 0; i < size; ++i)
434 				shadow[i] = x[i];
435 		} else {
436 			for (i = 0; i < size; ++i)
437 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
438 		}
439 	} else {
440 		n = next_page - src_addr;
441 		BUG_ON(n > sizeof(shadow));
442 
443 		/* First page */
444 		x = kmemcheck_shadow_lookup(src_addr);
445 		if (x) {
446 			kmemcheck_save_addr(src_addr);
447 			for (i = 0; i < n; ++i)
448 				shadow[i] = x[i];
449 		} else {
450 			/* Not tracked */
451 			for (i = 0; i < n; ++i)
452 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
453 		}
454 
455 		/* Second page */
456 		x = kmemcheck_shadow_lookup(next_page);
457 		if (x) {
458 			kmemcheck_save_addr(next_page);
459 			for (i = n; i < size; ++i)
460 				shadow[i] = x[i - n];
461 		} else {
462 			/* Not tracked */
463 			for (i = n; i < size; ++i)
464 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
465 		}
466 	}
467 
468 	page = dst_addr & PAGE_MASK;
469 	next_addr = dst_addr + size - 1;
470 	next_page = next_addr & PAGE_MASK;
471 
472 	if (likely(page == next_page)) {
473 		/* Same page */
474 		x = kmemcheck_shadow_lookup(dst_addr);
475 		if (x) {
476 			kmemcheck_save_addr(dst_addr);
477 			for (i = 0; i < size; ++i) {
478 				x[i] = shadow[i];
479 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
480 			}
481 		}
482 	} else {
483 		n = next_page - dst_addr;
484 		BUG_ON(n > sizeof(shadow));
485 
486 		/* First page */
487 		x = kmemcheck_shadow_lookup(dst_addr);
488 		if (x) {
489 			kmemcheck_save_addr(dst_addr);
490 			for (i = 0; i < n; ++i) {
491 				x[i] = shadow[i];
492 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
493 			}
494 		}
495 
496 		/* Second page */
497 		x = kmemcheck_shadow_lookup(next_page);
498 		if (x) {
499 			kmemcheck_save_addr(next_page);
500 			for (i = n; i < size; ++i) {
501 				x[i - n] = shadow[i];
502 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
503 			}
504 		}
505 	}
506 
507 	status = kmemcheck_shadow_test(shadow, size);
508 	if (status == KMEMCHECK_SHADOW_INITIALIZED)
509 		return;
510 
511 	if (kmemcheck_enabled)
512 		kmemcheck_error_save(status, src_addr, size, regs);
513 
514 	if (kmemcheck_enabled == 2)
515 		kmemcheck_enabled = 0;
516 }
517 
518 enum kmemcheck_method {
519 	KMEMCHECK_READ,
520 	KMEMCHECK_WRITE,
521 };
522 
kmemcheck_access(struct pt_regs * regs,unsigned long fallback_address,enum kmemcheck_method fallback_method)523 static void kmemcheck_access(struct pt_regs *regs,
524 	unsigned long fallback_address, enum kmemcheck_method fallback_method)
525 {
526 	const uint8_t *insn;
527 	const uint8_t *insn_primary;
528 	unsigned int size;
529 
530 	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
531 
532 	/* Recursive fault -- ouch. */
533 	if (data->busy) {
534 		kmemcheck_show_addr(fallback_address);
535 		kmemcheck_error_save_bug(regs);
536 		return;
537 	}
538 
539 	data->busy = true;
540 
541 	insn = (const uint8_t *) regs->ip;
542 	insn_primary = kmemcheck_opcode_get_primary(insn);
543 
544 	kmemcheck_opcode_decode(insn, &size);
545 
546 	switch (insn_primary[0]) {
547 #ifdef CONFIG_KMEMCHECK_BITOPS_OK
548 		/* AND, OR, XOR */
549 		/*
550 		 * Unfortunately, these instructions have to be excluded from
551 		 * our regular checking since they access only some (and not
552 		 * all) bits. This clears out "bogus" bitfield-access warnings.
553 		 */
554 	case 0x80:
555 	case 0x81:
556 	case 0x82:
557 	case 0x83:
558 		switch ((insn_primary[1] >> 3) & 7) {
559 			/* OR */
560 		case 1:
561 			/* AND */
562 		case 4:
563 			/* XOR */
564 		case 6:
565 			kmemcheck_write(regs, fallback_address, size);
566 			goto out;
567 
568 			/* ADD */
569 		case 0:
570 			/* ADC */
571 		case 2:
572 			/* SBB */
573 		case 3:
574 			/* SUB */
575 		case 5:
576 			/* CMP */
577 		case 7:
578 			break;
579 		}
580 		break;
581 #endif
582 
583 		/* MOVS, MOVSB, MOVSW, MOVSD */
584 	case 0xa4:
585 	case 0xa5:
586 		/*
587 		 * These instructions are special because they take two
588 		 * addresses, but we only get one page fault.
589 		 */
590 		kmemcheck_copy(regs, regs->si, regs->di, size);
591 		goto out;
592 
593 		/* CMPS, CMPSB, CMPSW, CMPSD */
594 	case 0xa6:
595 	case 0xa7:
596 		kmemcheck_read(regs, regs->si, size);
597 		kmemcheck_read(regs, regs->di, size);
598 		goto out;
599 	}
600 
601 	/*
602 	 * If the opcode isn't special in any way, we use the data from the
603 	 * page fault handler to determine the address and type of memory
604 	 * access.
605 	 */
606 	switch (fallback_method) {
607 	case KMEMCHECK_READ:
608 		kmemcheck_read(regs, fallback_address, size);
609 		goto out;
610 	case KMEMCHECK_WRITE:
611 		kmemcheck_write(regs, fallback_address, size);
612 		goto out;
613 	}
614 
615 out:
616 	data->busy = false;
617 }
618 
kmemcheck_fault(struct pt_regs * regs,unsigned long address,unsigned long error_code)619 bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
620 	unsigned long error_code)
621 {
622 	pte_t *pte;
623 
624 	/*
625 	 * XXX: Is it safe to assume that memory accesses from virtual 86
626 	 * mode or non-kernel code segments will _never_ access kernel
627 	 * memory (e.g. tracked pages)? For now, we need this to avoid
628 	 * invoking kmemcheck for PnP BIOS calls.
629 	 */
630 	if (regs->flags & X86_VM_MASK)
631 		return false;
632 	if (regs->cs != __KERNEL_CS)
633 		return false;
634 
635 	pte = kmemcheck_pte_lookup(address);
636 	if (!pte)
637 		return false;
638 
639 	WARN_ON_ONCE(in_nmi());
640 
641 	if (error_code & 2)
642 		kmemcheck_access(regs, address, KMEMCHECK_WRITE);
643 	else
644 		kmemcheck_access(regs, address, KMEMCHECK_READ);
645 
646 	kmemcheck_show(regs);
647 	return true;
648 }
649 
kmemcheck_trap(struct pt_regs * regs)650 bool kmemcheck_trap(struct pt_regs *regs)
651 {
652 	if (!kmemcheck_active(regs))
653 		return false;
654 
655 	/* We're done. */
656 	kmemcheck_hide(regs);
657 	return true;
658 }
659