• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Dynamic function tracing support.
4  *
5  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
6  *
7  * Thanks goes to Ingo Molnar, for suggesting the idea.
8  * Mathieu Desnoyers, for suggesting postponing the modifications.
9  * Arjan van de Ven, for keeping me straight, and explaining to me
10  * the dangers of modifying code on the run.
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/spinlock.h>
16 #include <linux/hardirq.h>
17 #include <linux/uaccess.h>
18 #include <linux/ftrace.h>
19 #include <linux/percpu.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/list.h>
24 #include <linux/module.h>
25 #include <linux/memory.h>
26 
27 #include <trace/syscall.h>
28 
29 #include <asm/set_memory.h>
30 #include <asm/kprobes.h>
31 #include <asm/ftrace.h>
32 #include <asm/nops.h>
33 #include <asm/text-patching.h>
34 
35 #ifdef CONFIG_DYNAMIC_FTRACE
36 
ftrace_arch_code_modify_prepare(void)37 int ftrace_arch_code_modify_prepare(void)
38     __acquires(&text_mutex)
39 {
40 	mutex_lock(&text_mutex);
41 	set_kernel_text_rw();
42 	set_all_modules_text_rw();
43 	return 0;
44 }
45 
ftrace_arch_code_modify_post_process(void)46 int ftrace_arch_code_modify_post_process(void)
47     __releases(&text_mutex)
48 {
49 	set_all_modules_text_ro();
50 	set_kernel_text_ro();
51 	mutex_unlock(&text_mutex);
52 	return 0;
53 }
54 
55 union ftrace_code_union {
56 	char code[MCOUNT_INSN_SIZE];
57 	struct {
58 		unsigned char op;
59 		int offset;
60 	} __attribute__((packed));
61 };
62 
ftrace_calc_offset(long ip,long addr)63 static int ftrace_calc_offset(long ip, long addr)
64 {
65 	return (int)(addr - ip);
66 }
67 
68 static unsigned char *
ftrace_text_replace(unsigned char op,unsigned long ip,unsigned long addr)69 ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr)
70 {
71 	static union ftrace_code_union calc;
72 
73 	calc.op		= op;
74 	calc.offset	= ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
75 
76 	return calc.code;
77 }
78 
79 static unsigned char *
ftrace_call_replace(unsigned long ip,unsigned long addr)80 ftrace_call_replace(unsigned long ip, unsigned long addr)
81 {
82 	return ftrace_text_replace(0xe8, ip, addr);
83 }
84 
85 static inline int
within(unsigned long addr,unsigned long start,unsigned long end)86 within(unsigned long addr, unsigned long start, unsigned long end)
87 {
88 	return addr >= start && addr < end;
89 }
90 
text_ip_addr(unsigned long ip)91 static unsigned long text_ip_addr(unsigned long ip)
92 {
93 	/*
94 	 * On x86_64, kernel text mappings are mapped read-only, so we use
95 	 * the kernel identity mapping instead of the kernel text mapping
96 	 * to modify the kernel text.
97 	 *
98 	 * For 32bit kernels, these mappings are same and we can use
99 	 * kernel identity mapping to modify code.
100 	 */
101 	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
102 		ip = (unsigned long)__va(__pa_symbol(ip));
103 
104 	return ip;
105 }
106 
ftrace_nop_replace(void)107 static const unsigned char *ftrace_nop_replace(void)
108 {
109 	return ideal_nops[NOP_ATOMIC5];
110 }
111 
112 static int
ftrace_modify_code_direct(unsigned long ip,unsigned const char * old_code,unsigned const char * new_code)113 ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
114 		   unsigned const char *new_code)
115 {
116 	unsigned char replaced[MCOUNT_INSN_SIZE];
117 
118 	ftrace_expected = old_code;
119 
120 	/*
121 	 * Note:
122 	 * We are paranoid about modifying text, as if a bug was to happen, it
123 	 * could cause us to read or write to someplace that could cause harm.
124 	 * Carefully read and modify the code with probe_kernel_*(), and make
125 	 * sure what we read is what we expected it to be before modifying it.
126 	 */
127 
128 	/* read the text we want to modify */
129 	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
130 		return -EFAULT;
131 
132 	/* Make sure it is what we expect it to be */
133 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
134 		return -EINVAL;
135 
136 	ip = text_ip_addr(ip);
137 
138 	/* replace the text with the new text */
139 	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
140 		return -EPERM;
141 
142 	sync_core();
143 
144 	return 0;
145 }
146 
ftrace_make_nop(struct module * mod,struct dyn_ftrace * rec,unsigned long addr)147 int ftrace_make_nop(struct module *mod,
148 		    struct dyn_ftrace *rec, unsigned long addr)
149 {
150 	unsigned const char *new, *old;
151 	unsigned long ip = rec->ip;
152 
153 	old = ftrace_call_replace(ip, addr);
154 	new = ftrace_nop_replace();
155 
156 	/*
157 	 * On boot up, and when modules are loaded, the MCOUNT_ADDR
158 	 * is converted to a nop, and will never become MCOUNT_ADDR
159 	 * again. This code is either running before SMP (on boot up)
160 	 * or before the code will ever be executed (module load).
161 	 * We do not want to use the breakpoint version in this case,
162 	 * just modify the code directly.
163 	 */
164 	if (addr == MCOUNT_ADDR)
165 		return ftrace_modify_code_direct(rec->ip, old, new);
166 
167 	ftrace_expected = NULL;
168 
169 	/* Normal cases use add_brk_on_nop */
170 	WARN_ONCE(1, "invalid use of ftrace_make_nop");
171 	return -EINVAL;
172 }
173 
ftrace_make_call(struct dyn_ftrace * rec,unsigned long addr)174 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
175 {
176 	unsigned const char *new, *old;
177 	unsigned long ip = rec->ip;
178 
179 	old = ftrace_nop_replace();
180 	new = ftrace_call_replace(ip, addr);
181 
182 	/* Should only be called when module is loaded */
183 	return ftrace_modify_code_direct(rec->ip, old, new);
184 }
185 
186 /*
187  * The modifying_ftrace_code is used to tell the breakpoint
188  * handler to call ftrace_int3_handler(). If it fails to
189  * call this handler for a breakpoint added by ftrace, then
190  * the kernel may crash.
191  *
192  * As atomic_writes on x86 do not need a barrier, we do not
193  * need to add smp_mb()s for this to work. It is also considered
194  * that we can not read the modifying_ftrace_code before
195  * executing the breakpoint. That would be quite remarkable if
196  * it could do that. Here's the flow that is required:
197  *
198  *   CPU-0                          CPU-1
199  *
200  * atomic_inc(mfc);
201  * write int3s
202  *				<trap-int3> // implicit (r)mb
203  *				if (atomic_read(mfc))
204  *					call ftrace_int3_handler()
205  *
206  * Then when we are finished:
207  *
208  * atomic_dec(mfc);
209  *
210  * If we hit a breakpoint that was not set by ftrace, it does not
211  * matter if ftrace_int3_handler() is called or not. It will
212  * simply be ignored. But it is crucial that a ftrace nop/caller
213  * breakpoint is handled. No other user should ever place a
214  * breakpoint on an ftrace nop/caller location. It must only
215  * be done by this code.
216  */
217 atomic_t modifying_ftrace_code __read_mostly;
218 
219 static int
220 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
221 		   unsigned const char *new_code);
222 
223 /*
224  * Should never be called:
225  *  As it is only called by __ftrace_replace_code() which is called by
226  *  ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
227  *  which is called to turn mcount into nops or nops into function calls
228  *  but not to convert a function from not using regs to one that uses
229  *  regs, which ftrace_modify_call() is for.
230  */
ftrace_modify_call(struct dyn_ftrace * rec,unsigned long old_addr,unsigned long addr)231 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
232 				 unsigned long addr)
233 {
234 	WARN_ON(1);
235 	ftrace_expected = NULL;
236 	return -EINVAL;
237 }
238 
239 static unsigned long ftrace_update_func;
240 static unsigned long ftrace_update_func_call;
241 
update_ftrace_func(unsigned long ip,void * new)242 static int update_ftrace_func(unsigned long ip, void *new)
243 {
244 	unsigned char old[MCOUNT_INSN_SIZE];
245 	int ret;
246 
247 	memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
248 
249 	ftrace_update_func = ip;
250 	/* Make sure the breakpoints see the ftrace_update_func update */
251 	smp_wmb();
252 
253 	/* See comment above by declaration of modifying_ftrace_code */
254 	atomic_inc(&modifying_ftrace_code);
255 
256 	ret = ftrace_modify_code(ip, old, new);
257 
258 	atomic_dec(&modifying_ftrace_code);
259 
260 	return ret;
261 }
262 
ftrace_update_ftrace_func(ftrace_func_t func)263 int ftrace_update_ftrace_func(ftrace_func_t func)
264 {
265 	unsigned long ip = (unsigned long)(&ftrace_call);
266 	unsigned char *new;
267 	int ret;
268 
269 	ftrace_update_func_call = (unsigned long)func;
270 
271 	new = ftrace_call_replace(ip, (unsigned long)func);
272 	ret = update_ftrace_func(ip, new);
273 
274 	/* Also update the regs callback function */
275 	if (!ret) {
276 		ip = (unsigned long)(&ftrace_regs_call);
277 		new = ftrace_call_replace(ip, (unsigned long)func);
278 		ret = update_ftrace_func(ip, new);
279 	}
280 
281 	return ret;
282 }
283 
is_ftrace_caller(unsigned long ip)284 static int is_ftrace_caller(unsigned long ip)
285 {
286 	if (ip == ftrace_update_func)
287 		return 1;
288 
289 	return 0;
290 }
291 
292 /*
293  * A breakpoint was added to the code address we are about to
294  * modify, and this is the handle that will just skip over it.
295  * We are either changing a nop into a trace call, or a trace
296  * call to a nop. While the change is taking place, we treat
297  * it just like it was a nop.
298  */
ftrace_int3_handler(struct pt_regs * regs)299 int ftrace_int3_handler(struct pt_regs *regs)
300 {
301 	unsigned long ip;
302 
303 	if (WARN_ON_ONCE(!regs))
304 		return 0;
305 
306 	ip = regs->ip - INT3_INSN_SIZE;
307 
308 #ifdef CONFIG_X86_64
309 	if (ftrace_location(ip)) {
310 		int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
311 		return 1;
312 	} else if (is_ftrace_caller(ip)) {
313 		if (!ftrace_update_func_call) {
314 			int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
315 			return 1;
316 		}
317 		int3_emulate_call(regs, ftrace_update_func_call);
318 		return 1;
319 	}
320 #else
321 	if (ftrace_location(ip) || is_ftrace_caller(ip)) {
322 		int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
323 		return 1;
324 	}
325 #endif
326 
327 	return 0;
328 }
329 
ftrace_write(unsigned long ip,const char * val,int size)330 static int ftrace_write(unsigned long ip, const char *val, int size)
331 {
332 	ip = text_ip_addr(ip);
333 
334 	if (probe_kernel_write((void *)ip, val, size))
335 		return -EPERM;
336 
337 	return 0;
338 }
339 
add_break(unsigned long ip,const char * old)340 static int add_break(unsigned long ip, const char *old)
341 {
342 	unsigned char replaced[MCOUNT_INSN_SIZE];
343 	unsigned char brk = BREAKPOINT_INSTRUCTION;
344 
345 	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
346 		return -EFAULT;
347 
348 	ftrace_expected = old;
349 
350 	/* Make sure it is what we expect it to be */
351 	if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
352 		return -EINVAL;
353 
354 	return ftrace_write(ip, &brk, 1);
355 }
356 
add_brk_on_call(struct dyn_ftrace * rec,unsigned long addr)357 static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
358 {
359 	unsigned const char *old;
360 	unsigned long ip = rec->ip;
361 
362 	old = ftrace_call_replace(ip, addr);
363 
364 	return add_break(rec->ip, old);
365 }
366 
367 
add_brk_on_nop(struct dyn_ftrace * rec)368 static int add_brk_on_nop(struct dyn_ftrace *rec)
369 {
370 	unsigned const char *old;
371 
372 	old = ftrace_nop_replace();
373 
374 	return add_break(rec->ip, old);
375 }
376 
add_breakpoints(struct dyn_ftrace * rec,int enable)377 static int add_breakpoints(struct dyn_ftrace *rec, int enable)
378 {
379 	unsigned long ftrace_addr;
380 	int ret;
381 
382 	ftrace_addr = ftrace_get_addr_curr(rec);
383 
384 	ret = ftrace_test_record(rec, enable);
385 
386 	switch (ret) {
387 	case FTRACE_UPDATE_IGNORE:
388 		return 0;
389 
390 	case FTRACE_UPDATE_MAKE_CALL:
391 		/* converting nop to call */
392 		return add_brk_on_nop(rec);
393 
394 	case FTRACE_UPDATE_MODIFY_CALL:
395 	case FTRACE_UPDATE_MAKE_NOP:
396 		/* converting a call to a nop */
397 		return add_brk_on_call(rec, ftrace_addr);
398 	}
399 	return 0;
400 }
401 
402 /*
403  * On error, we need to remove breakpoints. This needs to
404  * be done caefully. If the address does not currently have a
405  * breakpoint, we know we are done. Otherwise, we look at the
406  * remaining 4 bytes of the instruction. If it matches a nop
407  * we replace the breakpoint with the nop. Otherwise we replace
408  * it with the call instruction.
409  */
remove_breakpoint(struct dyn_ftrace * rec)410 static int remove_breakpoint(struct dyn_ftrace *rec)
411 {
412 	unsigned char ins[MCOUNT_INSN_SIZE];
413 	unsigned char brk = BREAKPOINT_INSTRUCTION;
414 	const unsigned char *nop;
415 	unsigned long ftrace_addr;
416 	unsigned long ip = rec->ip;
417 
418 	/* If we fail the read, just give up */
419 	if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
420 		return -EFAULT;
421 
422 	/* If this does not have a breakpoint, we are done */
423 	if (ins[0] != brk)
424 		return 0;
425 
426 	nop = ftrace_nop_replace();
427 
428 	/*
429 	 * If the last 4 bytes of the instruction do not match
430 	 * a nop, then we assume that this is a call to ftrace_addr.
431 	 */
432 	if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
433 		/*
434 		 * For extra paranoidism, we check if the breakpoint is on
435 		 * a call that would actually jump to the ftrace_addr.
436 		 * If not, don't touch the breakpoint, we make just create
437 		 * a disaster.
438 		 */
439 		ftrace_addr = ftrace_get_addr_new(rec);
440 		nop = ftrace_call_replace(ip, ftrace_addr);
441 
442 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
443 			goto update;
444 
445 		/* Check both ftrace_addr and ftrace_old_addr */
446 		ftrace_addr = ftrace_get_addr_curr(rec);
447 		nop = ftrace_call_replace(ip, ftrace_addr);
448 
449 		ftrace_expected = nop;
450 
451 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
452 			return -EINVAL;
453 	}
454 
455  update:
456 	return ftrace_write(ip, nop, 1);
457 }
458 
add_update_code(unsigned long ip,unsigned const char * new)459 static int add_update_code(unsigned long ip, unsigned const char *new)
460 {
461 	/* skip breakpoint */
462 	ip++;
463 	new++;
464 	return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
465 }
466 
add_update_call(struct dyn_ftrace * rec,unsigned long addr)467 static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
468 {
469 	unsigned long ip = rec->ip;
470 	unsigned const char *new;
471 
472 	new = ftrace_call_replace(ip, addr);
473 	return add_update_code(ip, new);
474 }
475 
add_update_nop(struct dyn_ftrace * rec)476 static int add_update_nop(struct dyn_ftrace *rec)
477 {
478 	unsigned long ip = rec->ip;
479 	unsigned const char *new;
480 
481 	new = ftrace_nop_replace();
482 	return add_update_code(ip, new);
483 }
484 
add_update(struct dyn_ftrace * rec,int enable)485 static int add_update(struct dyn_ftrace *rec, int enable)
486 {
487 	unsigned long ftrace_addr;
488 	int ret;
489 
490 	ret = ftrace_test_record(rec, enable);
491 
492 	ftrace_addr  = ftrace_get_addr_new(rec);
493 
494 	switch (ret) {
495 	case FTRACE_UPDATE_IGNORE:
496 		return 0;
497 
498 	case FTRACE_UPDATE_MODIFY_CALL:
499 	case FTRACE_UPDATE_MAKE_CALL:
500 		/* converting nop to call */
501 		return add_update_call(rec, ftrace_addr);
502 
503 	case FTRACE_UPDATE_MAKE_NOP:
504 		/* converting a call to a nop */
505 		return add_update_nop(rec);
506 	}
507 
508 	return 0;
509 }
510 
finish_update_call(struct dyn_ftrace * rec,unsigned long addr)511 static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
512 {
513 	unsigned long ip = rec->ip;
514 	unsigned const char *new;
515 
516 	new = ftrace_call_replace(ip, addr);
517 
518 	return ftrace_write(ip, new, 1);
519 }
520 
finish_update_nop(struct dyn_ftrace * rec)521 static int finish_update_nop(struct dyn_ftrace *rec)
522 {
523 	unsigned long ip = rec->ip;
524 	unsigned const char *new;
525 
526 	new = ftrace_nop_replace();
527 
528 	return ftrace_write(ip, new, 1);
529 }
530 
finish_update(struct dyn_ftrace * rec,int enable)531 static int finish_update(struct dyn_ftrace *rec, int enable)
532 {
533 	unsigned long ftrace_addr;
534 	int ret;
535 
536 	ret = ftrace_update_record(rec, enable);
537 
538 	ftrace_addr = ftrace_get_addr_new(rec);
539 
540 	switch (ret) {
541 	case FTRACE_UPDATE_IGNORE:
542 		return 0;
543 
544 	case FTRACE_UPDATE_MODIFY_CALL:
545 	case FTRACE_UPDATE_MAKE_CALL:
546 		/* converting nop to call */
547 		return finish_update_call(rec, ftrace_addr);
548 
549 	case FTRACE_UPDATE_MAKE_NOP:
550 		/* converting a call to a nop */
551 		return finish_update_nop(rec);
552 	}
553 
554 	return 0;
555 }
556 
do_sync_core(void * data)557 static void do_sync_core(void *data)
558 {
559 	sync_core();
560 }
561 
run_sync(void)562 static void run_sync(void)
563 {
564 	int enable_irqs;
565 
566 	/* No need to sync if there's only one CPU */
567 	if (num_online_cpus() == 1)
568 		return;
569 
570 	enable_irqs = irqs_disabled();
571 
572 	/* We may be called with interrupts disabled (on bootup). */
573 	if (enable_irqs)
574 		local_irq_enable();
575 	on_each_cpu(do_sync_core, NULL, 1);
576 	if (enable_irqs)
577 		local_irq_disable();
578 }
579 
ftrace_replace_code(int enable)580 void ftrace_replace_code(int enable)
581 {
582 	struct ftrace_rec_iter *iter;
583 	struct dyn_ftrace *rec;
584 	const char *report = "adding breakpoints";
585 	int count = 0;
586 	int ret;
587 
588 	for_ftrace_rec_iter(iter) {
589 		rec = ftrace_rec_iter_record(iter);
590 
591 		ret = add_breakpoints(rec, enable);
592 		if (ret)
593 			goto remove_breakpoints;
594 		count++;
595 	}
596 
597 	run_sync();
598 
599 	report = "updating code";
600 	count = 0;
601 
602 	for_ftrace_rec_iter(iter) {
603 		rec = ftrace_rec_iter_record(iter);
604 
605 		ret = add_update(rec, enable);
606 		if (ret)
607 			goto remove_breakpoints;
608 		count++;
609 	}
610 
611 	run_sync();
612 
613 	report = "removing breakpoints";
614 	count = 0;
615 
616 	for_ftrace_rec_iter(iter) {
617 		rec = ftrace_rec_iter_record(iter);
618 
619 		ret = finish_update(rec, enable);
620 		if (ret)
621 			goto remove_breakpoints;
622 		count++;
623 	}
624 
625 	run_sync();
626 
627 	return;
628 
629  remove_breakpoints:
630 	pr_warn("Failed on %s (%d):\n", report, count);
631 	ftrace_bug(ret, rec);
632 	for_ftrace_rec_iter(iter) {
633 		rec = ftrace_rec_iter_record(iter);
634 		/*
635 		 * Breakpoints are handled only when this function is in
636 		 * progress. The system could not work with them.
637 		 */
638 		if (remove_breakpoint(rec))
639 			BUG();
640 	}
641 	run_sync();
642 }
643 
644 static int
ftrace_modify_code(unsigned long ip,unsigned const char * old_code,unsigned const char * new_code)645 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
646 		   unsigned const char *new_code)
647 {
648 	int ret;
649 
650 	ret = add_break(ip, old_code);
651 	if (ret)
652 		goto out;
653 
654 	run_sync();
655 
656 	ret = add_update_code(ip, new_code);
657 	if (ret)
658 		goto fail_update;
659 
660 	run_sync();
661 
662 	ret = ftrace_write(ip, new_code, 1);
663 	/*
664 	 * The breakpoint is handled only when this function is in progress.
665 	 * The system could not work if we could not remove it.
666 	 */
667 	BUG_ON(ret);
668  out:
669 	run_sync();
670 	return ret;
671 
672  fail_update:
673 	/* Also here the system could not work with the breakpoint */
674 	if (ftrace_write(ip, old_code, 1))
675 		BUG();
676 	goto out;
677 }
678 
arch_ftrace_update_code(int command)679 void arch_ftrace_update_code(int command)
680 {
681 	/* See comment above by declaration of modifying_ftrace_code */
682 	atomic_inc(&modifying_ftrace_code);
683 
684 	ftrace_modify_all_code(command);
685 
686 	atomic_dec(&modifying_ftrace_code);
687 }
688 
ftrace_dyn_arch_init(void)689 int __init ftrace_dyn_arch_init(void)
690 {
691 	return 0;
692 }
693 
694 /* Currently only x86_64 supports dynamic trampolines */
695 #ifdef CONFIG_X86_64
696 
697 #ifdef CONFIG_MODULES
698 #include <linux/moduleloader.h>
699 /* Module allocation simplifies allocating memory for code */
alloc_tramp(unsigned long size)700 static inline void *alloc_tramp(unsigned long size)
701 {
702 	return module_alloc(size);
703 }
tramp_free(void * tramp,int size)704 static inline void tramp_free(void *tramp, int size)
705 {
706 	int npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
707 
708 	set_memory_nx((unsigned long)tramp, npages);
709 	set_memory_rw((unsigned long)tramp, npages);
710 	module_memfree(tramp);
711 }
712 #else
713 /* Trampolines can only be created if modules are supported */
alloc_tramp(unsigned long size)714 static inline void *alloc_tramp(unsigned long size)
715 {
716 	return NULL;
717 }
tramp_free(void * tramp,int size)718 static inline void tramp_free(void *tramp, int size) { }
719 #endif
720 
721 /* Defined as markers to the end of the ftrace default trampolines */
722 extern void ftrace_regs_caller_end(void);
723 extern void ftrace_epilogue(void);
724 extern void ftrace_caller_op_ptr(void);
725 extern void ftrace_regs_caller_op_ptr(void);
726 
727 /* movq function_trace_op(%rip), %rdx */
728 /* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */
729 #define OP_REF_SIZE	7
730 
731 /*
732  * The ftrace_ops is passed to the function callback. Since the
733  * trampoline only services a single ftrace_ops, we can pass in
734  * that ops directly.
735  *
736  * The ftrace_op_code_union is used to create a pointer to the
737  * ftrace_ops that will be passed to the callback function.
738  */
739 union ftrace_op_code_union {
740 	char code[OP_REF_SIZE];
741 	struct {
742 		char op[3];
743 		int offset;
744 	} __attribute__((packed));
745 };
746 
747 #define RET_SIZE		1
748 
749 static unsigned long
create_trampoline(struct ftrace_ops * ops,unsigned int * tramp_size)750 create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
751 {
752 	unsigned long start_offset;
753 	unsigned long end_offset;
754 	unsigned long op_offset;
755 	unsigned long offset;
756 	unsigned long npages;
757 	unsigned long size;
758 	unsigned long retq;
759 	unsigned long *ptr;
760 	void *trampoline;
761 	void *ip;
762 	/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
763 	unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
764 	union ftrace_op_code_union op_ptr;
765 	int ret;
766 
767 	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
768 		start_offset = (unsigned long)ftrace_regs_caller;
769 		end_offset = (unsigned long)ftrace_regs_caller_end;
770 		op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
771 	} else {
772 		start_offset = (unsigned long)ftrace_caller;
773 		end_offset = (unsigned long)ftrace_epilogue;
774 		op_offset = (unsigned long)ftrace_caller_op_ptr;
775 	}
776 
777 	size = end_offset - start_offset;
778 
779 	/*
780 	 * Allocate enough size to store the ftrace_caller code,
781 	 * the iret , as well as the address of the ftrace_ops this
782 	 * trampoline is used for.
783 	 */
784 	trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *));
785 	if (!trampoline)
786 		return 0;
787 
788 	*tramp_size = size + RET_SIZE + sizeof(void *);
789 	npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
790 
791 	/* Copy ftrace_caller onto the trampoline memory */
792 	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
793 	if (WARN_ON(ret < 0))
794 		goto fail;
795 
796 	ip = trampoline + size;
797 
798 	/* The trampoline ends with ret(q) */
799 	retq = (unsigned long)ftrace_stub;
800 	ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
801 	if (WARN_ON(ret < 0))
802 		goto fail;
803 
804 	/*
805 	 * The address of the ftrace_ops that is used for this trampoline
806 	 * is stored at the end of the trampoline. This will be used to
807 	 * load the third parameter for the callback. Basically, that
808 	 * location at the end of the trampoline takes the place of
809 	 * the global function_trace_op variable.
810 	 */
811 
812 	ptr = (unsigned long *)(trampoline + size + RET_SIZE);
813 	*ptr = (unsigned long)ops;
814 
815 	op_offset -= start_offset;
816 	memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
817 
818 	/* Are we pointing to the reference? */
819 	if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0))
820 		goto fail;
821 
822 	/* Load the contents of ptr into the callback parameter */
823 	offset = (unsigned long)ptr;
824 	offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE;
825 
826 	op_ptr.offset = offset;
827 
828 	/* put in the new offset to the ftrace_ops */
829 	memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
830 
831 	/* ALLOC_TRAMP flags lets us know we created it */
832 	ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
833 
834 	/*
835 	 * Module allocation needs to be completed by making the page
836 	 * executable. The page is still writable, which is a security hazard,
837 	 * but anyhow ftrace breaks W^X completely.
838 	 */
839 	set_memory_x((unsigned long)trampoline, npages);
840 	return (unsigned long)trampoline;
841 fail:
842 	tramp_free(trampoline, *tramp_size);
843 	return 0;
844 }
845 
calc_trampoline_call_offset(bool save_regs)846 static unsigned long calc_trampoline_call_offset(bool save_regs)
847 {
848 	unsigned long start_offset;
849 	unsigned long call_offset;
850 
851 	if (save_regs) {
852 		start_offset = (unsigned long)ftrace_regs_caller;
853 		call_offset = (unsigned long)ftrace_regs_call;
854 	} else {
855 		start_offset = (unsigned long)ftrace_caller;
856 		call_offset = (unsigned long)ftrace_call;
857 	}
858 
859 	return call_offset - start_offset;
860 }
861 
arch_ftrace_update_trampoline(struct ftrace_ops * ops)862 void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
863 {
864 	ftrace_func_t func;
865 	unsigned char *new;
866 	unsigned long offset;
867 	unsigned long ip;
868 	unsigned int size;
869 	int ret, npages;
870 
871 	if (ops->trampoline) {
872 		/*
873 		 * The ftrace_ops caller may set up its own trampoline.
874 		 * In such a case, this code must not modify it.
875 		 */
876 		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
877 			return;
878 		npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
879 		set_memory_rw(ops->trampoline, npages);
880 	} else {
881 		ops->trampoline = create_trampoline(ops, &size);
882 		if (!ops->trampoline)
883 			return;
884 		ops->trampoline_size = size;
885 		npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
886 	}
887 
888 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
889 	ip = ops->trampoline + offset;
890 
891 	func = ftrace_ops_get_func(ops);
892 
893 	ftrace_update_func_call = (unsigned long)func;
894 
895 	/* Do a safe modify in case the trampoline is executing */
896 	new = ftrace_call_replace(ip, (unsigned long)func);
897 	ret = update_ftrace_func(ip, new);
898 	set_memory_ro(ops->trampoline, npages);
899 
900 	/* The update should never fail */
901 	WARN_ON(ret);
902 }
903 
904 /* Return the address of the function the trampoline calls */
addr_from_call(void * ptr)905 static void *addr_from_call(void *ptr)
906 {
907 	union ftrace_code_union calc;
908 	int ret;
909 
910 	ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE);
911 	if (WARN_ON_ONCE(ret < 0))
912 		return NULL;
913 
914 	/* Make sure this is a call */
915 	if (WARN_ON_ONCE(calc.op != 0xe8)) {
916 		pr_warn("Expected e8, got %x\n", calc.op);
917 		return NULL;
918 	}
919 
920 	return ptr + MCOUNT_INSN_SIZE + calc.offset;
921 }
922 
923 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
924 			   unsigned long frame_pointer);
925 
926 /*
927  * If the ops->trampoline was not allocated, then it probably
928  * has a static trampoline func, or is the ftrace caller itself.
929  */
static_tramp_func(struct ftrace_ops * ops,struct dyn_ftrace * rec)930 static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
931 {
932 	unsigned long offset;
933 	bool save_regs = rec->flags & FTRACE_FL_REGS_EN;
934 	void *ptr;
935 
936 	if (ops && ops->trampoline) {
937 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
938 		/*
939 		 * We only know about function graph tracer setting as static
940 		 * trampoline.
941 		 */
942 		if (ops->trampoline == FTRACE_GRAPH_ADDR)
943 			return (void *)prepare_ftrace_return;
944 #endif
945 		return NULL;
946 	}
947 
948 	offset = calc_trampoline_call_offset(save_regs);
949 
950 	if (save_regs)
951 		ptr = (void *)FTRACE_REGS_ADDR + offset;
952 	else
953 		ptr = (void *)FTRACE_ADDR + offset;
954 
955 	return addr_from_call(ptr);
956 }
957 
arch_ftrace_trampoline_func(struct ftrace_ops * ops,struct dyn_ftrace * rec)958 void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
959 {
960 	unsigned long offset;
961 
962 	/* If we didn't allocate this trampoline, consider it static */
963 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
964 		return static_tramp_func(ops, rec);
965 
966 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
967 	return addr_from_call((void *)ops->trampoline + offset);
968 }
969 
arch_ftrace_trampoline_free(struct ftrace_ops * ops)970 void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
971 {
972 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
973 		return;
974 
975 	tramp_free((void *)ops->trampoline, ops->trampoline_size);
976 	ops->trampoline = 0;
977 }
978 
979 #endif /* CONFIG_X86_64 */
980 #endif /* CONFIG_DYNAMIC_FTRACE */
981 
982 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
983 
984 #ifdef CONFIG_DYNAMIC_FTRACE
985 extern void ftrace_graph_call(void);
986 
ftrace_jmp_replace(unsigned long ip,unsigned long addr)987 static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
988 {
989 	return ftrace_text_replace(0xe9, ip, addr);
990 }
991 
ftrace_mod_jmp(unsigned long ip,void * func)992 static int ftrace_mod_jmp(unsigned long ip, void *func)
993 {
994 	unsigned char *new;
995 
996 	ftrace_update_func_call = 0UL;
997 	new = ftrace_jmp_replace(ip, (unsigned long)func);
998 
999 	return update_ftrace_func(ip, new);
1000 }
1001 
ftrace_enable_ftrace_graph_caller(void)1002 int ftrace_enable_ftrace_graph_caller(void)
1003 {
1004 	unsigned long ip = (unsigned long)(&ftrace_graph_call);
1005 
1006 	return ftrace_mod_jmp(ip, &ftrace_graph_caller);
1007 }
1008 
ftrace_disable_ftrace_graph_caller(void)1009 int ftrace_disable_ftrace_graph_caller(void)
1010 {
1011 	unsigned long ip = (unsigned long)(&ftrace_graph_call);
1012 
1013 	return ftrace_mod_jmp(ip, &ftrace_stub);
1014 }
1015 
1016 #endif /* !CONFIG_DYNAMIC_FTRACE */
1017 
1018 /*
1019  * Hook the return address and push it in the stack of return addrs
1020  * in current thread info.
1021  */
prepare_ftrace_return(unsigned long self_addr,unsigned long * parent,unsigned long frame_pointer)1022 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
1023 			   unsigned long frame_pointer)
1024 {
1025 	unsigned long old;
1026 	int faulted;
1027 	unsigned long return_hooker = (unsigned long)
1028 				&return_to_handler;
1029 
1030 	/*
1031 	 * When resuming from suspend-to-ram, this function can be indirectly
1032 	 * called from early CPU startup code while the CPU is in real mode,
1033 	 * which would fail miserably.  Make sure the stack pointer is a
1034 	 * virtual address.
1035 	 *
1036 	 * This check isn't as accurate as virt_addr_valid(), but it should be
1037 	 * good enough for this purpose, and it's fast.
1038 	 */
1039 	if (unlikely((long)__builtin_frame_address(0) >= 0))
1040 		return;
1041 
1042 	if (unlikely(ftrace_graph_is_dead()))
1043 		return;
1044 
1045 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
1046 		return;
1047 
1048 	/*
1049 	 * Protect against fault, even if it shouldn't
1050 	 * happen. This tool is too much intrusive to
1051 	 * ignore such a protection.
1052 	 */
1053 	asm volatile(
1054 		"1: " _ASM_MOV " (%[parent]), %[old]\n"
1055 		"2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
1056 		"   movl $0, %[faulted]\n"
1057 		"3:\n"
1058 
1059 		".section .fixup, \"ax\"\n"
1060 		"4: movl $1, %[faulted]\n"
1061 		"   jmp 3b\n"
1062 		".previous\n"
1063 
1064 		_ASM_EXTABLE(1b, 4b)
1065 		_ASM_EXTABLE(2b, 4b)
1066 
1067 		: [old] "=&r" (old), [faulted] "=r" (faulted)
1068 		: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
1069 		: "memory"
1070 	);
1071 
1072 	if (unlikely(faulted)) {
1073 		ftrace_graph_stop();
1074 		WARN_ON(1);
1075 		return;
1076 	}
1077 
1078 	if (function_graph_enter(old, self_addr, frame_pointer, parent))
1079 		*parent = old;
1080 }
1081 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1082