• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Kprobes-based tracing events
4  *
5  * Created by Masami Hiramatsu <mhiramat@redhat.com>
6  *
7  */
8 #define pr_fmt(fmt)	"trace_kprobe: " fmt
9 
10 #include <linux/module.h>
11 #include <linux/uaccess.h>
12 #include <linux/rculist.h>
13 #include <linux/error-injection.h>
14 
15 #include "trace_kprobe_selftest.h"
16 #include "trace_probe.h"
17 
18 #define KPROBE_EVENT_SYSTEM "kprobes"
19 #define KRETPROBE_MAXACTIVE_MAX 4096
20 
21 /**
22  * Kprobe event core functions
23  */
24 struct trace_kprobe {
25 	struct list_head	list;
26 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
27 	unsigned long __percpu *nhit;
28 	const char		*symbol;	/* symbol name */
29 	struct trace_probe	tp;
30 };
31 
32 #define SIZEOF_TRACE_KPROBE(n)				\
33 	(offsetof(struct trace_kprobe, tp.args) +	\
34 	(sizeof(struct probe_arg) * (n)))
35 
trace_kprobe_is_return(struct trace_kprobe * tk)36 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
37 {
38 	return tk->rp.handler != NULL;
39 }
40 
trace_kprobe_symbol(struct trace_kprobe * tk)41 static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
42 {
43 	return tk->symbol ? tk->symbol : "unknown";
44 }
45 
trace_kprobe_offset(struct trace_kprobe * tk)46 static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
47 {
48 	return tk->rp.kp.offset;
49 }
50 
trace_kprobe_has_gone(struct trace_kprobe * tk)51 static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
52 {
53 	return !!(kprobe_gone(&tk->rp.kp));
54 }
55 
trace_kprobe_within_module(struct trace_kprobe * tk,struct module * mod)56 static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
57 						 struct module *mod)
58 {
59 	int len = strlen(mod->name);
60 	const char *name = trace_kprobe_symbol(tk);
61 	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
62 }
63 
trace_kprobe_module_exist(struct trace_kprobe * tk)64 static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
65 {
66 	char *p;
67 	bool ret;
68 
69 	if (!tk->symbol)
70 		return false;
71 	p = strchr(tk->symbol, ':');
72 	if (!p)
73 		return true;
74 	*p = '\0';
75 	mutex_lock(&module_mutex);
76 	ret = !!find_module(tk->symbol);
77 	mutex_unlock(&module_mutex);
78 	*p = ':';
79 
80 	return ret;
81 }
82 
trace_kprobe_nhit(struct trace_kprobe * tk)83 static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
84 {
85 	unsigned long nhit = 0;
86 	int cpu;
87 
88 	for_each_possible_cpu(cpu)
89 		nhit += *per_cpu_ptr(tk->nhit, cpu);
90 
91 	return nhit;
92 }
93 
94 /* Return 0 if it fails to find the symbol address */
95 static nokprobe_inline
trace_kprobe_address(struct trace_kprobe * tk)96 unsigned long trace_kprobe_address(struct trace_kprobe *tk)
97 {
98 	unsigned long addr;
99 
100 	if (tk->symbol) {
101 		addr = (unsigned long)
102 			kallsyms_lookup_name(trace_kprobe_symbol(tk));
103 		if (addr)
104 			addr += tk->rp.kp.offset;
105 	} else {
106 		addr = (unsigned long)tk->rp.kp.addr;
107 	}
108 	return addr;
109 }
110 
trace_kprobe_on_func_entry(struct trace_event_call * call)111 bool trace_kprobe_on_func_entry(struct trace_event_call *call)
112 {
113 	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
114 
115 	return kprobe_on_func_entry(tk->rp.kp.addr,
116 			tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
117 			tk->rp.kp.addr ? 0 : tk->rp.kp.offset);
118 }
119 
trace_kprobe_error_injectable(struct trace_event_call * call)120 bool trace_kprobe_error_injectable(struct trace_event_call *call)
121 {
122 	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
123 
124 	return within_error_injection_list(trace_kprobe_address(tk));
125 }
126 
127 static int register_kprobe_event(struct trace_kprobe *tk);
128 static int unregister_kprobe_event(struct trace_kprobe *tk);
129 
130 static DEFINE_MUTEX(probe_lock);
131 static LIST_HEAD(probe_list);
132 
133 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
134 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
135 				struct pt_regs *regs);
136 
137 /* Memory fetching by symbol */
138 struct symbol_cache {
139 	char		*symbol;
140 	long		offset;
141 	unsigned long	addr;
142 };
143 
update_symbol_cache(struct symbol_cache * sc)144 unsigned long update_symbol_cache(struct symbol_cache *sc)
145 {
146 	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
147 
148 	if (sc->addr)
149 		sc->addr += sc->offset;
150 
151 	return sc->addr;
152 }
153 
free_symbol_cache(struct symbol_cache * sc)154 void free_symbol_cache(struct symbol_cache *sc)
155 {
156 	kfree(sc->symbol);
157 	kfree(sc);
158 }
159 
alloc_symbol_cache(const char * sym,long offset)160 struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
161 {
162 	struct symbol_cache *sc;
163 
164 	if (!sym || strlen(sym) == 0)
165 		return NULL;
166 
167 	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
168 	if (!sc)
169 		return NULL;
170 
171 	sc->symbol = kstrdup(sym, GFP_KERNEL);
172 	if (!sc->symbol) {
173 		kfree(sc);
174 		return NULL;
175 	}
176 	sc->offset = offset;
177 	update_symbol_cache(sc);
178 
179 	return sc;
180 }
181 
182 /*
183  * Kprobes-specific fetch functions
184  */
185 #define DEFINE_FETCH_stack(type)					\
186 static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\
187 					  void *offset, void *dest)	\
188 {									\
189 	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
190 				(unsigned int)((unsigned long)offset));	\
191 }									\
192 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
193 
194 DEFINE_BASIC_FETCH_FUNCS(stack)
195 /* No string on the stack entry */
196 #define fetch_stack_string	NULL
197 #define fetch_stack_string_size	NULL
198 
199 #define DEFINE_FETCH_memory(type)					\
200 static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\
201 					  void *addr, void *dest)	\
202 {									\
203 	type retval;							\
204 	if (probe_kernel_address(addr, retval))				\
205 		*(type *)dest = 0;					\
206 	else								\
207 		*(type *)dest = retval;					\
208 }									\
209 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
210 
DEFINE_BASIC_FETCH_FUNCS(memory)211 DEFINE_BASIC_FETCH_FUNCS(memory)
212 /*
213  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
214  * length and relative data location.
215  */
216 static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
217 					    void *addr, void *dest)
218 {
219 	int maxlen = get_rloc_len(*(u32 *)dest);
220 	u8 *dst = get_rloc_data(dest);
221 	long ret;
222 
223 	if (!maxlen)
224 		return;
225 
226 	/*
227 	 * Try to get string again, since the string can be changed while
228 	 * probing.
229 	 */
230 	ret = strncpy_from_unsafe(dst, addr, maxlen);
231 
232 	if (ret < 0) {	/* Failed to fetch string */
233 		dst[0] = '\0';
234 		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
235 	} else {
236 		*(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
237 	}
238 }
239 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
240 
241 /* Return the length of string -- including null terminal byte */
FETCH_FUNC_NAME(memory,string_size)242 static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
243 						 void *addr, void *dest)
244 {
245 	mm_segment_t old_fs;
246 	int ret, len = 0;
247 	u8 c;
248 
249 	old_fs = get_fs();
250 	set_fs(KERNEL_DS);
251 	pagefault_disable();
252 
253 	do {
254 		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
255 		len++;
256 	} while (c && ret == 0 && len < MAX_STRING_SIZE);
257 
258 	pagefault_enable();
259 	set_fs(old_fs);
260 
261 	if (ret < 0)	/* Failed to check the length */
262 		*(u32 *)dest = 0;
263 	else
264 		*(u32 *)dest = len;
265 }
266 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
267 
268 #define DEFINE_FETCH_symbol(type)					\
269 void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
270 {									\
271 	struct symbol_cache *sc = data;					\
272 	if (sc->addr)							\
273 		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
274 	else								\
275 		*(type *)dest = 0;					\
276 }									\
277 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
278 
279 DEFINE_BASIC_FETCH_FUNCS(symbol)
280 DEFINE_FETCH_symbol(string)
281 DEFINE_FETCH_symbol(string_size)
282 
283 /* kprobes don't support file_offset fetch methods */
284 #define fetch_file_offset_u8		NULL
285 #define fetch_file_offset_u16		NULL
286 #define fetch_file_offset_u32		NULL
287 #define fetch_file_offset_u64		NULL
288 #define fetch_file_offset_string	NULL
289 #define fetch_file_offset_string_size	NULL
290 
291 /* Fetch type information table */
292 static const struct fetch_type kprobes_fetch_type_table[] = {
293 	/* Special types */
294 	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
295 					sizeof(u32), 1, "__data_loc char[]"),
296 	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
297 					string_size, sizeof(u32), 0, "u32"),
298 	/* Basic types */
299 	ASSIGN_FETCH_TYPE(u8,  u8,  0),
300 	ASSIGN_FETCH_TYPE(u16, u16, 0),
301 	ASSIGN_FETCH_TYPE(u32, u32, 0),
302 	ASSIGN_FETCH_TYPE(u64, u64, 0),
303 	ASSIGN_FETCH_TYPE(s8,  u8,  1),
304 	ASSIGN_FETCH_TYPE(s16, u16, 1),
305 	ASSIGN_FETCH_TYPE(s32, u32, 1),
306 	ASSIGN_FETCH_TYPE(s64, u64, 1),
307 	ASSIGN_FETCH_TYPE_ALIAS(x8,  u8,  u8,  0),
308 	ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
309 	ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
310 	ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
311 
312 	ASSIGN_FETCH_TYPE_END
313 };
314 
315 /*
316  * Allocate new trace_probe and initialize it (including kprobes).
317  */
alloc_trace_kprobe(const char * group,const char * event,void * addr,const char * symbol,unsigned long offs,int maxactive,int nargs,bool is_return)318 static struct trace_kprobe *alloc_trace_kprobe(const char *group,
319 					     const char *event,
320 					     void *addr,
321 					     const char *symbol,
322 					     unsigned long offs,
323 					     int maxactive,
324 					     int nargs, bool is_return)
325 {
326 	struct trace_kprobe *tk;
327 	int ret = -ENOMEM;
328 
329 	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
330 	if (!tk)
331 		return ERR_PTR(ret);
332 
333 	tk->nhit = alloc_percpu(unsigned long);
334 	if (!tk->nhit)
335 		goto error;
336 
337 	if (symbol) {
338 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
339 		if (!tk->symbol)
340 			goto error;
341 		tk->rp.kp.symbol_name = tk->symbol;
342 		tk->rp.kp.offset = offs;
343 	} else
344 		tk->rp.kp.addr = addr;
345 
346 	if (is_return)
347 		tk->rp.handler = kretprobe_dispatcher;
348 	else
349 		tk->rp.kp.pre_handler = kprobe_dispatcher;
350 
351 	tk->rp.maxactive = maxactive;
352 
353 	if (!event || !is_good_name(event)) {
354 		ret = -EINVAL;
355 		goto error;
356 	}
357 
358 	tk->tp.call.class = &tk->tp.class;
359 	tk->tp.call.name = kstrdup(event, GFP_KERNEL);
360 	if (!tk->tp.call.name)
361 		goto error;
362 
363 	if (!group || !is_good_name(group)) {
364 		ret = -EINVAL;
365 		goto error;
366 	}
367 
368 	tk->tp.class.system = kstrdup(group, GFP_KERNEL);
369 	if (!tk->tp.class.system)
370 		goto error;
371 
372 	INIT_LIST_HEAD(&tk->list);
373 	INIT_LIST_HEAD(&tk->tp.files);
374 	return tk;
375 error:
376 	kfree(tk->tp.call.name);
377 	kfree(tk->symbol);
378 	free_percpu(tk->nhit);
379 	kfree(tk);
380 	return ERR_PTR(ret);
381 }
382 
free_trace_kprobe(struct trace_kprobe * tk)383 static void free_trace_kprobe(struct trace_kprobe *tk)
384 {
385 	int i;
386 
387 	for (i = 0; i < tk->tp.nr_args; i++)
388 		traceprobe_free_probe_arg(&tk->tp.args[i]);
389 
390 	kfree(tk->tp.call.class->system);
391 	kfree(tk->tp.call.name);
392 	kfree(tk->symbol);
393 	free_percpu(tk->nhit);
394 	kfree(tk);
395 }
396 
find_trace_kprobe(const char * event,const char * group)397 static struct trace_kprobe *find_trace_kprobe(const char *event,
398 					      const char *group)
399 {
400 	struct trace_kprobe *tk;
401 
402 	list_for_each_entry(tk, &probe_list, list)
403 		if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
404 		    strcmp(tk->tp.call.class->system, group) == 0)
405 			return tk;
406 	return NULL;
407 }
408 
__enable_trace_kprobe(struct trace_kprobe * tk)409 static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
410 {
411 	int ret = 0;
412 
413 	if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
414 		if (trace_kprobe_is_return(tk))
415 			ret = enable_kretprobe(&tk->rp);
416 		else
417 			ret = enable_kprobe(&tk->rp.kp);
418 	}
419 
420 	return ret;
421 }
422 
423 /*
424  * Enable trace_probe
425  * if the file is NULL, enable "perf" handler, or enable "trace" handler.
426  */
427 static int
enable_trace_kprobe(struct trace_kprobe * tk,struct trace_event_file * file)428 enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
429 {
430 	struct event_file_link *link;
431 	int ret = 0;
432 
433 	if (file) {
434 		link = kmalloc(sizeof(*link), GFP_KERNEL);
435 		if (!link) {
436 			ret = -ENOMEM;
437 			goto out;
438 		}
439 
440 		link->file = file;
441 		list_add_tail_rcu(&link->list, &tk->tp.files);
442 
443 		tk->tp.flags |= TP_FLAG_TRACE;
444 		ret = __enable_trace_kprobe(tk);
445 		if (ret) {
446 			list_del_rcu(&link->list);
447 			kfree(link);
448 			tk->tp.flags &= ~TP_FLAG_TRACE;
449 		}
450 
451 	} else {
452 		tk->tp.flags |= TP_FLAG_PROFILE;
453 		ret = __enable_trace_kprobe(tk);
454 		if (ret)
455 			tk->tp.flags &= ~TP_FLAG_PROFILE;
456 	}
457  out:
458 	return ret;
459 }
460 
461 /*
462  * Disable trace_probe
463  * if the file is NULL, disable "perf" handler, or disable "trace" handler.
464  */
465 static int
disable_trace_kprobe(struct trace_kprobe * tk,struct trace_event_file * file)466 disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
467 {
468 	struct event_file_link *link = NULL;
469 	int wait = 0;
470 	int ret = 0;
471 
472 	if (file) {
473 		link = find_event_file_link(&tk->tp, file);
474 		if (!link) {
475 			ret = -EINVAL;
476 			goto out;
477 		}
478 
479 		list_del_rcu(&link->list);
480 		wait = 1;
481 		if (!list_empty(&tk->tp.files))
482 			goto out;
483 
484 		tk->tp.flags &= ~TP_FLAG_TRACE;
485 	} else
486 		tk->tp.flags &= ~TP_FLAG_PROFILE;
487 
488 	if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
489 		if (trace_kprobe_is_return(tk))
490 			disable_kretprobe(&tk->rp);
491 		else
492 			disable_kprobe(&tk->rp.kp);
493 		wait = 1;
494 	}
495 
496 	/*
497 	 * if tk is not added to any list, it must be a local trace_kprobe
498 	 * created with perf_event_open. We don't need to wait for these
499 	 * trace_kprobes
500 	 */
501 	if (list_empty(&tk->list))
502 		wait = 0;
503  out:
504 	if (wait) {
505 		/*
506 		 * Synchronize with kprobe_trace_func/kretprobe_trace_func
507 		 * to ensure disabled (all running handlers are finished).
508 		 * This is not only for kfree(), but also the caller,
509 		 * trace_remove_event_call() supposes it for releasing
510 		 * event_call related objects, which will be accessed in
511 		 * the kprobe_trace_func/kretprobe_trace_func.
512 		 */
513 		synchronize_sched();
514 		kfree(link);	/* Ignored if link == NULL */
515 	}
516 
517 	return ret;
518 }
519 
520 #if defined(CONFIG_KPROBES_ON_FTRACE) && \
521 	!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
__within_notrace_func(unsigned long addr)522 static bool __within_notrace_func(unsigned long addr)
523 {
524 	unsigned long offset, size;
525 
526 	if (!addr || !kallsyms_lookup_size_offset(addr, &size, &offset))
527 		return false;
528 
529 	/* Get the entry address of the target function */
530 	addr -= offset;
531 
532 	/*
533 	 * Since ftrace_location_range() does inclusive range check, we need
534 	 * to subtract 1 byte from the end address.
535 	 */
536 	return !ftrace_location_range(addr, addr + size - 1);
537 }
538 
within_notrace_func(struct trace_kprobe * tk)539 static bool within_notrace_func(struct trace_kprobe *tk)
540 {
541 	unsigned long addr = trace_kprobe_address(tk);
542 	char symname[KSYM_NAME_LEN], *p;
543 
544 	if (!__within_notrace_func(addr))
545 		return false;
546 
547 	/* Check if the address is on a suffixed-symbol */
548 	if (!lookup_symbol_name(addr, symname)) {
549 		p = strchr(symname, '.');
550 		if (!p)
551 			return true;
552 		*p = '\0';
553 		addr = (unsigned long)kprobe_lookup_name(symname, 0);
554 		if (addr)
555 			return __within_notrace_func(addr);
556 	}
557 
558 	return true;
559 }
560 #else
561 #define within_notrace_func(tk)	(false)
562 #endif
563 
564 /* Internal register function - just handle k*probes and flags */
__register_trace_kprobe(struct trace_kprobe * tk)565 static int __register_trace_kprobe(struct trace_kprobe *tk)
566 {
567 	int i, ret;
568 
569 	if (trace_probe_is_registered(&tk->tp))
570 		return -EINVAL;
571 
572 	if (within_notrace_func(tk)) {
573 		pr_warn("Could not probe notrace function %s\n",
574 			trace_kprobe_symbol(tk));
575 		return -EINVAL;
576 	}
577 
578 	for (i = 0; i < tk->tp.nr_args; i++)
579 		traceprobe_update_arg(&tk->tp.args[i]);
580 
581 	/* Set/clear disabled flag according to tp->flag */
582 	if (trace_probe_is_enabled(&tk->tp))
583 		tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
584 	else
585 		tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
586 
587 	if (trace_kprobe_is_return(tk))
588 		ret = register_kretprobe(&tk->rp);
589 	else
590 		ret = register_kprobe(&tk->rp.kp);
591 
592 	if (ret == 0) {
593 		tk->tp.flags |= TP_FLAG_REGISTERED;
594 	} else if (ret == -EILSEQ) {
595 		pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
596 			tk->rp.kp.addr);
597 		ret = -EINVAL;
598 	}
599 	return ret;
600 }
601 
602 /* Internal unregister function - just handle k*probes and flags */
__unregister_trace_kprobe(struct trace_kprobe * tk)603 static void __unregister_trace_kprobe(struct trace_kprobe *tk)
604 {
605 	if (trace_probe_is_registered(&tk->tp)) {
606 		if (trace_kprobe_is_return(tk))
607 			unregister_kretprobe(&tk->rp);
608 		else
609 			unregister_kprobe(&tk->rp.kp);
610 		tk->tp.flags &= ~TP_FLAG_REGISTERED;
611 		/* Cleanup kprobe for reuse */
612 		if (tk->rp.kp.symbol_name)
613 			tk->rp.kp.addr = NULL;
614 	}
615 }
616 
617 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
unregister_trace_kprobe(struct trace_kprobe * tk)618 static int unregister_trace_kprobe(struct trace_kprobe *tk)
619 {
620 	/* Enabled event can not be unregistered */
621 	if (trace_probe_is_enabled(&tk->tp))
622 		return -EBUSY;
623 
624 	/* Will fail if probe is being used by ftrace or perf */
625 	if (unregister_kprobe_event(tk))
626 		return -EBUSY;
627 
628 	__unregister_trace_kprobe(tk);
629 	list_del(&tk->list);
630 
631 	return 0;
632 }
633 
634 /* Register a trace_probe and probe_event */
register_trace_kprobe(struct trace_kprobe * tk)635 static int register_trace_kprobe(struct trace_kprobe *tk)
636 {
637 	struct trace_kprobe *old_tk;
638 	int ret;
639 
640 	mutex_lock(&probe_lock);
641 
642 	/* Delete old (same name) event if exist */
643 	old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
644 			tk->tp.call.class->system);
645 	if (old_tk) {
646 		ret = unregister_trace_kprobe(old_tk);
647 		if (ret < 0)
648 			goto end;
649 		free_trace_kprobe(old_tk);
650 	}
651 
652 	/* Register new event */
653 	ret = register_kprobe_event(tk);
654 	if (ret) {
655 		pr_warn("Failed to register probe event(%d)\n", ret);
656 		goto end;
657 	}
658 
659 	/* Register k*probe */
660 	ret = __register_trace_kprobe(tk);
661 	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
662 		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
663 		ret = 0;
664 	}
665 
666 	if (ret < 0)
667 		unregister_kprobe_event(tk);
668 	else
669 		list_add_tail(&tk->list, &probe_list);
670 
671 end:
672 	mutex_unlock(&probe_lock);
673 	return ret;
674 }
675 
676 /* Module notifier call back, checking event on the module */
trace_kprobe_module_callback(struct notifier_block * nb,unsigned long val,void * data)677 static int trace_kprobe_module_callback(struct notifier_block *nb,
678 				       unsigned long val, void *data)
679 {
680 	struct module *mod = data;
681 	struct trace_kprobe *tk;
682 	int ret;
683 
684 	if (val != MODULE_STATE_COMING)
685 		return NOTIFY_DONE;
686 
687 	/* Update probes on coming module */
688 	mutex_lock(&probe_lock);
689 	list_for_each_entry(tk, &probe_list, list) {
690 		if (trace_kprobe_within_module(tk, mod)) {
691 			/* Don't need to check busy - this should have gone. */
692 			__unregister_trace_kprobe(tk);
693 			ret = __register_trace_kprobe(tk);
694 			if (ret)
695 				pr_warn("Failed to re-register probe %s on %s: %d\n",
696 					trace_event_name(&tk->tp.call),
697 					mod->name, ret);
698 		}
699 	}
700 	mutex_unlock(&probe_lock);
701 
702 	return NOTIFY_DONE;
703 }
704 
705 static struct notifier_block trace_kprobe_module_nb = {
706 	.notifier_call = trace_kprobe_module_callback,
707 	.priority = 1	/* Invoked after kprobe module callback */
708 };
709 
710 /* Convert certain expected symbols into '_' when generating event names */
sanitize_event_name(char * name)711 static inline void sanitize_event_name(char *name)
712 {
713 	while (*name++ != '\0')
714 		if (*name == ':' || *name == '.')
715 			*name = '_';
716 }
717 
create_trace_kprobe(int argc,char ** argv)718 static int create_trace_kprobe(int argc, char **argv)
719 {
720 	/*
721 	 * Argument syntax:
722 	 *  - Add kprobe:
723 	 *      p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
724 	 *  - Add kretprobe:
725 	 *      r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
726 	 * Fetch args:
727 	 *  $retval	: fetch return value
728 	 *  $stack	: fetch stack address
729 	 *  $stackN	: fetch Nth of stack (N:0-)
730 	 *  $comm       : fetch current task comm
731 	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
732 	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
733 	 *  %REG	: fetch register REG
734 	 * Dereferencing memory fetch:
735 	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
736 	 * Alias name of args:
737 	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
738 	 * Type of args:
739 	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
740 	 */
741 	struct trace_kprobe *tk;
742 	int i, ret = 0;
743 	bool is_return = false, is_delete = false;
744 	char *symbol = NULL, *event = NULL, *group = NULL;
745 	int maxactive = 0;
746 	char *arg;
747 	long offset = 0;
748 	void *addr = NULL;
749 	char buf[MAX_EVENT_NAME_LEN];
750 
751 	/* argc must be >= 1 */
752 	if (argv[0][0] == 'p')
753 		is_return = false;
754 	else if (argv[0][0] == 'r')
755 		is_return = true;
756 	else if (argv[0][0] == '-')
757 		is_delete = true;
758 	else {
759 		pr_info("Probe definition must be started with 'p', 'r' or"
760 			" '-'.\n");
761 		return -EINVAL;
762 	}
763 
764 	event = strchr(&argv[0][1], ':');
765 	if (event) {
766 		event[0] = '\0';
767 		event++;
768 	}
769 	if (is_return && isdigit(argv[0][1])) {
770 		ret = kstrtouint(&argv[0][1], 0, &maxactive);
771 		if (ret) {
772 			pr_info("Failed to parse maxactive.\n");
773 			return ret;
774 		}
775 		/* kretprobes instances are iterated over via a list. The
776 		 * maximum should stay reasonable.
777 		 */
778 		if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
779 			pr_info("Maxactive is too big (%d > %d).\n",
780 				maxactive, KRETPROBE_MAXACTIVE_MAX);
781 			return -E2BIG;
782 		}
783 	}
784 
785 	if (event) {
786 		if (strchr(event, '/')) {
787 			group = event;
788 			event = strchr(group, '/') + 1;
789 			event[-1] = '\0';
790 			if (strlen(group) == 0) {
791 				pr_info("Group name is not specified\n");
792 				return -EINVAL;
793 			}
794 		}
795 		if (strlen(event) == 0) {
796 			pr_info("Event name is not specified\n");
797 			return -EINVAL;
798 		}
799 	}
800 	if (!group)
801 		group = KPROBE_EVENT_SYSTEM;
802 
803 	if (is_delete) {
804 		if (!event) {
805 			pr_info("Delete command needs an event name.\n");
806 			return -EINVAL;
807 		}
808 		mutex_lock(&probe_lock);
809 		tk = find_trace_kprobe(event, group);
810 		if (!tk) {
811 			mutex_unlock(&probe_lock);
812 			pr_info("Event %s/%s doesn't exist.\n", group, event);
813 			return -ENOENT;
814 		}
815 		/* delete an event */
816 		ret = unregister_trace_kprobe(tk);
817 		if (ret == 0)
818 			free_trace_kprobe(tk);
819 		mutex_unlock(&probe_lock);
820 		return ret;
821 	}
822 
823 	if (argc < 2) {
824 		pr_info("Probe point is not specified.\n");
825 		return -EINVAL;
826 	}
827 
828 	/* try to parse an address. if that fails, try to read the
829 	 * input as a symbol. */
830 	if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
831 		/* a symbol specified */
832 		symbol = argv[1];
833 		/* TODO: support .init module functions */
834 		ret = traceprobe_split_symbol_offset(symbol, &offset);
835 		if (ret || offset < 0 || offset > UINT_MAX) {
836 			pr_info("Failed to parse either an address or a symbol.\n");
837 			return ret;
838 		}
839 		if (offset && is_return &&
840 		    !kprobe_on_func_entry(NULL, symbol, offset)) {
841 			pr_info("Given offset is not valid for return probe.\n");
842 			return -EINVAL;
843 		}
844 	}
845 	argc -= 2; argv += 2;
846 
847 	/* setup a probe */
848 	if (!event) {
849 		/* Make a new event name */
850 		if (symbol)
851 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
852 				 is_return ? 'r' : 'p', symbol, offset);
853 		else
854 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
855 				 is_return ? 'r' : 'p', addr);
856 		sanitize_event_name(buf);
857 		event = buf;
858 	}
859 	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
860 			       argc, is_return);
861 	if (IS_ERR(tk)) {
862 		pr_info("Failed to allocate trace_probe.(%d)\n",
863 			(int)PTR_ERR(tk));
864 		return PTR_ERR(tk);
865 	}
866 
867 	/* parse arguments */
868 	ret = 0;
869 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
870 		struct probe_arg *parg = &tk->tp.args[i];
871 
872 		/* Increment count for freeing args in error case */
873 		tk->tp.nr_args++;
874 
875 		/* Parse argument name */
876 		arg = strchr(argv[i], '=');
877 		if (arg) {
878 			*arg++ = '\0';
879 			parg->name = kstrdup(argv[i], GFP_KERNEL);
880 		} else {
881 			arg = argv[i];
882 			/* If argument name is omitted, set "argN" */
883 			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
884 			parg->name = kstrdup(buf, GFP_KERNEL);
885 		}
886 
887 		if (!parg->name) {
888 			pr_info("Failed to allocate argument[%d] name.\n", i);
889 			ret = -ENOMEM;
890 			goto error;
891 		}
892 
893 		if (!is_good_name(parg->name)) {
894 			pr_info("Invalid argument[%d] name: %s\n",
895 				i, parg->name);
896 			ret = -EINVAL;
897 			goto error;
898 		}
899 
900 		if (traceprobe_conflict_field_name(parg->name,
901 							tk->tp.args, i)) {
902 			pr_info("Argument[%d] name '%s' conflicts with "
903 				"another field.\n", i, argv[i]);
904 			ret = -EINVAL;
905 			goto error;
906 		}
907 
908 		/* Parse fetch argument */
909 		ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
910 						is_return, true,
911 						kprobes_fetch_type_table);
912 		if (ret) {
913 			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
914 			goto error;
915 		}
916 	}
917 
918 	ret = register_trace_kprobe(tk);
919 	if (ret)
920 		goto error;
921 	return 0;
922 
923 error:
924 	free_trace_kprobe(tk);
925 	return ret;
926 }
927 
release_all_trace_kprobes(void)928 static int release_all_trace_kprobes(void)
929 {
930 	struct trace_kprobe *tk;
931 	int ret = 0;
932 
933 	mutex_lock(&probe_lock);
934 	/* Ensure no probe is in use. */
935 	list_for_each_entry(tk, &probe_list, list)
936 		if (trace_probe_is_enabled(&tk->tp)) {
937 			ret = -EBUSY;
938 			goto end;
939 		}
940 	/* TODO: Use batch unregistration */
941 	while (!list_empty(&probe_list)) {
942 		tk = list_entry(probe_list.next, struct trace_kprobe, list);
943 		ret = unregister_trace_kprobe(tk);
944 		if (ret)
945 			goto end;
946 		free_trace_kprobe(tk);
947 	}
948 
949 end:
950 	mutex_unlock(&probe_lock);
951 
952 	return ret;
953 }
954 
955 /* Probes listing interfaces */
probes_seq_start(struct seq_file * m,loff_t * pos)956 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
957 {
958 	mutex_lock(&probe_lock);
959 	return seq_list_start(&probe_list, *pos);
960 }
961 
probes_seq_next(struct seq_file * m,void * v,loff_t * pos)962 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
963 {
964 	return seq_list_next(v, &probe_list, pos);
965 }
966 
probes_seq_stop(struct seq_file * m,void * v)967 static void probes_seq_stop(struct seq_file *m, void *v)
968 {
969 	mutex_unlock(&probe_lock);
970 }
971 
probes_seq_show(struct seq_file * m,void * v)972 static int probes_seq_show(struct seq_file *m, void *v)
973 {
974 	struct trace_kprobe *tk = v;
975 	int i;
976 
977 	seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
978 	if (trace_kprobe_is_return(tk) && tk->rp.maxactive)
979 		seq_printf(m, "%d", tk->rp.maxactive);
980 	seq_printf(m, ":%s/%s", tk->tp.call.class->system,
981 			trace_event_name(&tk->tp.call));
982 
983 	if (!tk->symbol)
984 		seq_printf(m, " 0x%p", tk->rp.kp.addr);
985 	else if (tk->rp.kp.offset)
986 		seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
987 			   tk->rp.kp.offset);
988 	else
989 		seq_printf(m, " %s", trace_kprobe_symbol(tk));
990 
991 	for (i = 0; i < tk->tp.nr_args; i++)
992 		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
993 	seq_putc(m, '\n');
994 
995 	return 0;
996 }
997 
998 static const struct seq_operations probes_seq_op = {
999 	.start  = probes_seq_start,
1000 	.next   = probes_seq_next,
1001 	.stop   = probes_seq_stop,
1002 	.show   = probes_seq_show
1003 };
1004 
probes_open(struct inode * inode,struct file * file)1005 static int probes_open(struct inode *inode, struct file *file)
1006 {
1007 	int ret;
1008 
1009 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
1010 		ret = release_all_trace_kprobes();
1011 		if (ret < 0)
1012 			return ret;
1013 	}
1014 
1015 	return seq_open(file, &probes_seq_op);
1016 }
1017 
probes_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)1018 static ssize_t probes_write(struct file *file, const char __user *buffer,
1019 			    size_t count, loff_t *ppos)
1020 {
1021 	return trace_parse_run_command(file, buffer, count, ppos,
1022 				       create_trace_kprobe);
1023 }
1024 
1025 static const struct file_operations kprobe_events_ops = {
1026 	.owner          = THIS_MODULE,
1027 	.open           = probes_open,
1028 	.read           = seq_read,
1029 	.llseek         = seq_lseek,
1030 	.release        = seq_release,
1031 	.write		= probes_write,
1032 };
1033 
1034 /* Probes profiling interfaces */
probes_profile_seq_show(struct seq_file * m,void * v)1035 static int probes_profile_seq_show(struct seq_file *m, void *v)
1036 {
1037 	struct trace_kprobe *tk = v;
1038 
1039 	seq_printf(m, "  %-44s %15lu %15lu\n",
1040 		   trace_event_name(&tk->tp.call),
1041 		   trace_kprobe_nhit(tk),
1042 		   tk->rp.kp.nmissed);
1043 
1044 	return 0;
1045 }
1046 
1047 static const struct seq_operations profile_seq_op = {
1048 	.start  = probes_seq_start,
1049 	.next   = probes_seq_next,
1050 	.stop   = probes_seq_stop,
1051 	.show   = probes_profile_seq_show
1052 };
1053 
profile_open(struct inode * inode,struct file * file)1054 static int profile_open(struct inode *inode, struct file *file)
1055 {
1056 	return seq_open(file, &profile_seq_op);
1057 }
1058 
1059 static const struct file_operations kprobe_profile_ops = {
1060 	.owner          = THIS_MODULE,
1061 	.open           = profile_open,
1062 	.read           = seq_read,
1063 	.llseek         = seq_lseek,
1064 	.release        = seq_release,
1065 };
1066 
1067 /* Kprobe handler */
1068 static nokprobe_inline void
__kprobe_trace_func(struct trace_kprobe * tk,struct pt_regs * regs,struct trace_event_file * trace_file)1069 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
1070 		    struct trace_event_file *trace_file)
1071 {
1072 	struct kprobe_trace_entry_head *entry;
1073 	struct ring_buffer_event *event;
1074 	struct ring_buffer *buffer;
1075 	int size, dsize, pc;
1076 	unsigned long irq_flags;
1077 	struct trace_event_call *call = &tk->tp.call;
1078 
1079 	WARN_ON(call != trace_file->event_call);
1080 
1081 	if (trace_trigger_soft_disabled(trace_file))
1082 		return;
1083 
1084 	local_save_flags(irq_flags);
1085 	pc = preempt_count();
1086 
1087 	dsize = __get_data_size(&tk->tp, regs);
1088 	size = sizeof(*entry) + tk->tp.size + dsize;
1089 
1090 	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1091 						call->event.type,
1092 						size, irq_flags, pc);
1093 	if (!event)
1094 		return;
1095 
1096 	entry = ring_buffer_event_data(event);
1097 	entry->ip = (unsigned long)tk->rp.kp.addr;
1098 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1099 
1100 	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1101 					 entry, irq_flags, pc, regs);
1102 }
1103 
1104 static void
kprobe_trace_func(struct trace_kprobe * tk,struct pt_regs * regs)1105 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
1106 {
1107 	struct event_file_link *link;
1108 
1109 	list_for_each_entry_rcu(link, &tk->tp.files, list)
1110 		__kprobe_trace_func(tk, regs, link->file);
1111 }
1112 NOKPROBE_SYMBOL(kprobe_trace_func);
1113 
1114 /* Kretprobe handler */
1115 static nokprobe_inline void
__kretprobe_trace_func(struct trace_kprobe * tk,struct kretprobe_instance * ri,struct pt_regs * regs,struct trace_event_file * trace_file)1116 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1117 		       struct pt_regs *regs,
1118 		       struct trace_event_file *trace_file)
1119 {
1120 	struct kretprobe_trace_entry_head *entry;
1121 	struct ring_buffer_event *event;
1122 	struct ring_buffer *buffer;
1123 	int size, pc, dsize;
1124 	unsigned long irq_flags;
1125 	struct trace_event_call *call = &tk->tp.call;
1126 
1127 	WARN_ON(call != trace_file->event_call);
1128 
1129 	if (trace_trigger_soft_disabled(trace_file))
1130 		return;
1131 
1132 	local_save_flags(irq_flags);
1133 	pc = preempt_count();
1134 
1135 	dsize = __get_data_size(&tk->tp, regs);
1136 	size = sizeof(*entry) + tk->tp.size + dsize;
1137 
1138 	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1139 						call->event.type,
1140 						size, irq_flags, pc);
1141 	if (!event)
1142 		return;
1143 
1144 	entry = ring_buffer_event_data(event);
1145 	entry->func = (unsigned long)tk->rp.kp.addr;
1146 	entry->ret_ip = (unsigned long)ri->ret_addr;
1147 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1148 
1149 	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1150 					 entry, irq_flags, pc, regs);
1151 }
1152 
1153 static void
kretprobe_trace_func(struct trace_kprobe * tk,struct kretprobe_instance * ri,struct pt_regs * regs)1154 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1155 		     struct pt_regs *regs)
1156 {
1157 	struct event_file_link *link;
1158 
1159 	list_for_each_entry_rcu(link, &tk->tp.files, list)
1160 		__kretprobe_trace_func(tk, ri, regs, link->file);
1161 }
1162 NOKPROBE_SYMBOL(kretprobe_trace_func);
1163 
1164 /* Event entry printers */
1165 static enum print_line_t
print_kprobe_event(struct trace_iterator * iter,int flags,struct trace_event * event)1166 print_kprobe_event(struct trace_iterator *iter, int flags,
1167 		   struct trace_event *event)
1168 {
1169 	struct kprobe_trace_entry_head *field;
1170 	struct trace_seq *s = &iter->seq;
1171 	struct trace_probe *tp;
1172 	u8 *data;
1173 	int i;
1174 
1175 	field = (struct kprobe_trace_entry_head *)iter->ent;
1176 	tp = container_of(event, struct trace_probe, call.event);
1177 
1178 	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1179 
1180 	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1181 		goto out;
1182 
1183 	trace_seq_putc(s, ')');
1184 
1185 	data = (u8 *)&field[1];
1186 	for (i = 0; i < tp->nr_args; i++)
1187 		if (!tp->args[i].type->print(s, tp->args[i].name,
1188 					     data + tp->args[i].offset, field))
1189 			goto out;
1190 
1191 	trace_seq_putc(s, '\n');
1192  out:
1193 	return trace_handle_return(s);
1194 }
1195 
1196 static enum print_line_t
print_kretprobe_event(struct trace_iterator * iter,int flags,struct trace_event * event)1197 print_kretprobe_event(struct trace_iterator *iter, int flags,
1198 		      struct trace_event *event)
1199 {
1200 	struct kretprobe_trace_entry_head *field;
1201 	struct trace_seq *s = &iter->seq;
1202 	struct trace_probe *tp;
1203 	u8 *data;
1204 	int i;
1205 
1206 	field = (struct kretprobe_trace_entry_head *)iter->ent;
1207 	tp = container_of(event, struct trace_probe, call.event);
1208 
1209 	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1210 
1211 	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1212 		goto out;
1213 
1214 	trace_seq_puts(s, " <- ");
1215 
1216 	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1217 		goto out;
1218 
1219 	trace_seq_putc(s, ')');
1220 
1221 	data = (u8 *)&field[1];
1222 	for (i = 0; i < tp->nr_args; i++)
1223 		if (!tp->args[i].type->print(s, tp->args[i].name,
1224 					     data + tp->args[i].offset, field))
1225 			goto out;
1226 
1227 	trace_seq_putc(s, '\n');
1228 
1229  out:
1230 	return trace_handle_return(s);
1231 }
1232 
1233 
kprobe_event_define_fields(struct trace_event_call * event_call)1234 static int kprobe_event_define_fields(struct trace_event_call *event_call)
1235 {
1236 	int ret, i;
1237 	struct kprobe_trace_entry_head field;
1238 	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1239 
1240 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1241 	/* Set argument names as fields */
1242 	for (i = 0; i < tk->tp.nr_args; i++) {
1243 		struct probe_arg *parg = &tk->tp.args[i];
1244 
1245 		ret = trace_define_field(event_call, parg->type->fmttype,
1246 					 parg->name,
1247 					 sizeof(field) + parg->offset,
1248 					 parg->type->size,
1249 					 parg->type->is_signed,
1250 					 FILTER_OTHER);
1251 		if (ret)
1252 			return ret;
1253 	}
1254 	return 0;
1255 }
1256 
kretprobe_event_define_fields(struct trace_event_call * event_call)1257 static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1258 {
1259 	int ret, i;
1260 	struct kretprobe_trace_entry_head field;
1261 	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1262 
1263 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1264 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1265 	/* Set argument names as fields */
1266 	for (i = 0; i < tk->tp.nr_args; i++) {
1267 		struct probe_arg *parg = &tk->tp.args[i];
1268 
1269 		ret = trace_define_field(event_call, parg->type->fmttype,
1270 					 parg->name,
1271 					 sizeof(field) + parg->offset,
1272 					 parg->type->size,
1273 					 parg->type->is_signed,
1274 					 FILTER_OTHER);
1275 		if (ret)
1276 			return ret;
1277 	}
1278 	return 0;
1279 }
1280 
1281 #ifdef CONFIG_PERF_EVENTS
1282 
1283 /* Kprobe profile handler */
1284 static int
kprobe_perf_func(struct trace_kprobe * tk,struct pt_regs * regs)1285 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1286 {
1287 	struct trace_event_call *call = &tk->tp.call;
1288 	struct kprobe_trace_entry_head *entry;
1289 	struct hlist_head *head;
1290 	int size, __size, dsize;
1291 	int rctx;
1292 
1293 	if (bpf_prog_array_valid(call)) {
1294 		unsigned long orig_ip = instruction_pointer(regs);
1295 		int ret;
1296 
1297 		ret = trace_call_bpf(call, regs);
1298 
1299 		/*
1300 		 * We need to check and see if we modified the pc of the
1301 		 * pt_regs, and if so return 1 so that we don't do the
1302 		 * single stepping.
1303 		 */
1304 		if (orig_ip != instruction_pointer(regs))
1305 			return 1;
1306 		if (!ret)
1307 			return 0;
1308 	}
1309 
1310 	head = this_cpu_ptr(call->perf_events);
1311 	if (hlist_empty(head))
1312 		return 0;
1313 
1314 	dsize = __get_data_size(&tk->tp, regs);
1315 	__size = sizeof(*entry) + tk->tp.size + dsize;
1316 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1317 	size -= sizeof(u32);
1318 
1319 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1320 	if (!entry)
1321 		return 0;
1322 
1323 	entry->ip = (unsigned long)tk->rp.kp.addr;
1324 	memset(&entry[1], 0, dsize);
1325 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1326 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1327 			      head, NULL);
1328 	return 0;
1329 }
1330 NOKPROBE_SYMBOL(kprobe_perf_func);
1331 
1332 /* Kretprobe profile handler */
1333 static void
kretprobe_perf_func(struct trace_kprobe * tk,struct kretprobe_instance * ri,struct pt_regs * regs)1334 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1335 		    struct pt_regs *regs)
1336 {
1337 	struct trace_event_call *call = &tk->tp.call;
1338 	struct kretprobe_trace_entry_head *entry;
1339 	struct hlist_head *head;
1340 	int size, __size, dsize;
1341 	int rctx;
1342 
1343 	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1344 		return;
1345 
1346 	head = this_cpu_ptr(call->perf_events);
1347 	if (hlist_empty(head))
1348 		return;
1349 
1350 	dsize = __get_data_size(&tk->tp, regs);
1351 	__size = sizeof(*entry) + tk->tp.size + dsize;
1352 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1353 	size -= sizeof(u32);
1354 
1355 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1356 	if (!entry)
1357 		return;
1358 
1359 	entry->func = (unsigned long)tk->rp.kp.addr;
1360 	entry->ret_ip = (unsigned long)ri->ret_addr;
1361 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1362 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1363 			      head, NULL);
1364 }
1365 NOKPROBE_SYMBOL(kretprobe_perf_func);
1366 
bpf_get_kprobe_info(const struct perf_event * event,u32 * fd_type,const char ** symbol,u64 * probe_offset,u64 * probe_addr,bool perf_type_tracepoint)1367 int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
1368 			const char **symbol, u64 *probe_offset,
1369 			u64 *probe_addr, bool perf_type_tracepoint)
1370 {
1371 	const char *pevent = trace_event_name(event->tp_event);
1372 	const char *group = event->tp_event->class->system;
1373 	struct trace_kprobe *tk;
1374 
1375 	if (perf_type_tracepoint)
1376 		tk = find_trace_kprobe(pevent, group);
1377 	else
1378 		tk = event->tp_event->data;
1379 	if (!tk)
1380 		return -EINVAL;
1381 
1382 	*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
1383 					      : BPF_FD_TYPE_KPROBE;
1384 	if (tk->symbol) {
1385 		*symbol = tk->symbol;
1386 		*probe_offset = tk->rp.kp.offset;
1387 		*probe_addr = 0;
1388 	} else {
1389 		*symbol = NULL;
1390 		*probe_offset = 0;
1391 		*probe_addr = (unsigned long)tk->rp.kp.addr;
1392 	}
1393 	return 0;
1394 }
1395 #endif	/* CONFIG_PERF_EVENTS */
1396 
1397 /*
1398  * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
1399  *
1400  * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
1401  * lockless, but we can't race with this __init function.
1402  */
kprobe_register(struct trace_event_call * event,enum trace_reg type,void * data)1403 static int kprobe_register(struct trace_event_call *event,
1404 			   enum trace_reg type, void *data)
1405 {
1406 	struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
1407 	struct trace_event_file *file = data;
1408 
1409 	switch (type) {
1410 	case TRACE_REG_REGISTER:
1411 		return enable_trace_kprobe(tk, file);
1412 	case TRACE_REG_UNREGISTER:
1413 		return disable_trace_kprobe(tk, file);
1414 
1415 #ifdef CONFIG_PERF_EVENTS
1416 	case TRACE_REG_PERF_REGISTER:
1417 		return enable_trace_kprobe(tk, NULL);
1418 	case TRACE_REG_PERF_UNREGISTER:
1419 		return disable_trace_kprobe(tk, NULL);
1420 	case TRACE_REG_PERF_OPEN:
1421 	case TRACE_REG_PERF_CLOSE:
1422 	case TRACE_REG_PERF_ADD:
1423 	case TRACE_REG_PERF_DEL:
1424 		return 0;
1425 #endif
1426 	}
1427 	return 0;
1428 }
1429 
kprobe_dispatcher(struct kprobe * kp,struct pt_regs * regs)1430 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1431 {
1432 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1433 	int ret = 0;
1434 
1435 	raw_cpu_inc(*tk->nhit);
1436 
1437 	if (tk->tp.flags & TP_FLAG_TRACE)
1438 		kprobe_trace_func(tk, regs);
1439 #ifdef CONFIG_PERF_EVENTS
1440 	if (tk->tp.flags & TP_FLAG_PROFILE)
1441 		ret = kprobe_perf_func(tk, regs);
1442 #endif
1443 	return ret;
1444 }
1445 NOKPROBE_SYMBOL(kprobe_dispatcher);
1446 
1447 static int
kretprobe_dispatcher(struct kretprobe_instance * ri,struct pt_regs * regs)1448 kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1449 {
1450 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1451 
1452 	raw_cpu_inc(*tk->nhit);
1453 
1454 	if (tk->tp.flags & TP_FLAG_TRACE)
1455 		kretprobe_trace_func(tk, ri, regs);
1456 #ifdef CONFIG_PERF_EVENTS
1457 	if (tk->tp.flags & TP_FLAG_PROFILE)
1458 		kretprobe_perf_func(tk, ri, regs);
1459 #endif
1460 	return 0;	/* We don't tweek kernel, so just return 0 */
1461 }
1462 NOKPROBE_SYMBOL(kretprobe_dispatcher);
1463 
1464 static struct trace_event_functions kretprobe_funcs = {
1465 	.trace		= print_kretprobe_event
1466 };
1467 
1468 static struct trace_event_functions kprobe_funcs = {
1469 	.trace		= print_kprobe_event
1470 };
1471 
init_trace_event_call(struct trace_kprobe * tk,struct trace_event_call * call)1472 static inline void init_trace_event_call(struct trace_kprobe *tk,
1473 					 struct trace_event_call *call)
1474 {
1475 	INIT_LIST_HEAD(&call->class->fields);
1476 	if (trace_kprobe_is_return(tk)) {
1477 		call->event.funcs = &kretprobe_funcs;
1478 		call->class->define_fields = kretprobe_event_define_fields;
1479 	} else {
1480 		call->event.funcs = &kprobe_funcs;
1481 		call->class->define_fields = kprobe_event_define_fields;
1482 	}
1483 
1484 	call->flags = TRACE_EVENT_FL_KPROBE;
1485 	call->class->reg = kprobe_register;
1486 	call->data = tk;
1487 }
1488 
register_kprobe_event(struct trace_kprobe * tk)1489 static int register_kprobe_event(struct trace_kprobe *tk)
1490 {
1491 	struct trace_event_call *call = &tk->tp.call;
1492 	int ret = 0;
1493 
1494 	init_trace_event_call(tk, call);
1495 
1496 	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
1497 		return -ENOMEM;
1498 	ret = register_trace_event(&call->event);
1499 	if (!ret) {
1500 		kfree(call->print_fmt);
1501 		return -ENODEV;
1502 	}
1503 	ret = trace_add_event_call(call);
1504 	if (ret) {
1505 		pr_info("Failed to register kprobe event: %s\n",
1506 			trace_event_name(call));
1507 		kfree(call->print_fmt);
1508 		unregister_trace_event(&call->event);
1509 	}
1510 	return ret;
1511 }
1512 
unregister_kprobe_event(struct trace_kprobe * tk)1513 static int unregister_kprobe_event(struct trace_kprobe *tk)
1514 {
1515 	int ret;
1516 
1517 	/* tp->event is unregistered in trace_remove_event_call() */
1518 	ret = trace_remove_event_call(&tk->tp.call);
1519 	if (!ret)
1520 		kfree(tk->tp.call.print_fmt);
1521 	return ret;
1522 }
1523 
1524 #ifdef CONFIG_PERF_EVENTS
1525 /* create a trace_kprobe, but don't add it to global lists */
1526 struct trace_event_call *
create_local_trace_kprobe(char * func,void * addr,unsigned long offs,bool is_return)1527 create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
1528 			  bool is_return)
1529 {
1530 	struct trace_kprobe *tk;
1531 	int ret;
1532 	char *event;
1533 
1534 	/*
1535 	 * local trace_kprobes are not added to probe_list, so they are never
1536 	 * searched in find_trace_kprobe(). Therefore, there is no concern of
1537 	 * duplicated name here.
1538 	 */
1539 	event = func ? func : "DUMMY_EVENT";
1540 
1541 	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
1542 				offs, 0 /* maxactive */, 0 /* nargs */,
1543 				is_return);
1544 
1545 	if (IS_ERR(tk)) {
1546 		pr_info("Failed to allocate trace_probe.(%d)\n",
1547 			(int)PTR_ERR(tk));
1548 		return ERR_CAST(tk);
1549 	}
1550 
1551 	init_trace_event_call(tk, &tk->tp.call);
1552 
1553 	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
1554 		ret = -ENOMEM;
1555 		goto error;
1556 	}
1557 
1558 	ret = __register_trace_kprobe(tk);
1559 	if (ret < 0) {
1560 		kfree(tk->tp.call.print_fmt);
1561 		goto error;
1562 	}
1563 
1564 	return &tk->tp.call;
1565 error:
1566 	free_trace_kprobe(tk);
1567 	return ERR_PTR(ret);
1568 }
1569 
destroy_local_trace_kprobe(struct trace_event_call * event_call)1570 void destroy_local_trace_kprobe(struct trace_event_call *event_call)
1571 {
1572 	struct trace_kprobe *tk;
1573 
1574 	tk = container_of(event_call, struct trace_kprobe, tp.call);
1575 
1576 	if (trace_probe_is_enabled(&tk->tp)) {
1577 		WARN_ON(1);
1578 		return;
1579 	}
1580 
1581 	__unregister_trace_kprobe(tk);
1582 
1583 	kfree(tk->tp.call.print_fmt);
1584 	free_trace_kprobe(tk);
1585 }
1586 #endif /* CONFIG_PERF_EVENTS */
1587 
1588 /* Make a tracefs interface for controlling probe points */
init_kprobe_trace(void)1589 static __init int init_kprobe_trace(void)
1590 {
1591 	struct dentry *d_tracer;
1592 	struct dentry *entry;
1593 
1594 	if (register_module_notifier(&trace_kprobe_module_nb))
1595 		return -EINVAL;
1596 
1597 	d_tracer = tracing_init_dentry();
1598 	if (IS_ERR(d_tracer))
1599 		return 0;
1600 
1601 	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1602 				    NULL, &kprobe_events_ops);
1603 
1604 	/* Event list interface */
1605 	if (!entry)
1606 		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1607 
1608 	/* Profile interface */
1609 	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1610 				    NULL, &kprobe_profile_ops);
1611 
1612 	if (!entry)
1613 		pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1614 	return 0;
1615 }
1616 fs_initcall(init_kprobe_trace);
1617 
1618 
1619 #ifdef CONFIG_FTRACE_STARTUP_TEST
1620 static __init struct trace_event_file *
find_trace_probe_file(struct trace_kprobe * tk,struct trace_array * tr)1621 find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1622 {
1623 	struct trace_event_file *file;
1624 
1625 	list_for_each_entry(file, &tr->events, list)
1626 		if (file->event_call == &tk->tp.call)
1627 			return file;
1628 
1629 	return NULL;
1630 }
1631 
1632 /*
1633  * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1634  * stage, we can do this lockless.
1635  */
kprobe_trace_self_tests_init(void)1636 static __init int kprobe_trace_self_tests_init(void)
1637 {
1638 	int ret, warn = 0;
1639 	int (*target)(int, int, int, int, int, int);
1640 	struct trace_kprobe *tk;
1641 	struct trace_event_file *file;
1642 
1643 	if (tracing_is_disabled())
1644 		return -ENODEV;
1645 
1646 	target = kprobe_trace_selftest_target;
1647 
1648 	pr_info("Testing kprobe tracing: ");
1649 
1650 	ret = trace_run_command("p:testprobe kprobe_trace_selftest_target "
1651 				"$stack $stack0 +0($stack)",
1652 				create_trace_kprobe);
1653 	if (WARN_ON_ONCE(ret)) {
1654 		pr_warn("error on probing function entry.\n");
1655 		warn++;
1656 	} else {
1657 		/* Enable trace point */
1658 		tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1659 		if (WARN_ON_ONCE(tk == NULL)) {
1660 			pr_warn("error on getting new probe.\n");
1661 			warn++;
1662 		} else {
1663 			file = find_trace_probe_file(tk, top_trace_array());
1664 			if (WARN_ON_ONCE(file == NULL)) {
1665 				pr_warn("error on getting probe file.\n");
1666 				warn++;
1667 			} else
1668 				enable_trace_kprobe(tk, file);
1669 		}
1670 	}
1671 
1672 	ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target "
1673 				"$retval", create_trace_kprobe);
1674 	if (WARN_ON_ONCE(ret)) {
1675 		pr_warn("error on probing function return.\n");
1676 		warn++;
1677 	} else {
1678 		/* Enable trace point */
1679 		tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1680 		if (WARN_ON_ONCE(tk == NULL)) {
1681 			pr_warn("error on getting 2nd new probe.\n");
1682 			warn++;
1683 		} else {
1684 			file = find_trace_probe_file(tk, top_trace_array());
1685 			if (WARN_ON_ONCE(file == NULL)) {
1686 				pr_warn("error on getting probe file.\n");
1687 				warn++;
1688 			} else
1689 				enable_trace_kprobe(tk, file);
1690 		}
1691 	}
1692 
1693 	if (warn)
1694 		goto end;
1695 
1696 	ret = target(1, 2, 3, 4, 5, 6);
1697 
1698 	/*
1699 	 * Not expecting an error here, the check is only to prevent the
1700 	 * optimizer from removing the call to target() as otherwise there
1701 	 * are no side-effects and the call is never performed.
1702 	 */
1703 	if (ret != 21)
1704 		warn++;
1705 
1706 	/* Disable trace points before removing it */
1707 	tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1708 	if (WARN_ON_ONCE(tk == NULL)) {
1709 		pr_warn("error on getting test probe.\n");
1710 		warn++;
1711 	} else {
1712 		if (trace_kprobe_nhit(tk) != 1) {
1713 			pr_warn("incorrect number of testprobe hits\n");
1714 			warn++;
1715 		}
1716 
1717 		file = find_trace_probe_file(tk, top_trace_array());
1718 		if (WARN_ON_ONCE(file == NULL)) {
1719 			pr_warn("error on getting probe file.\n");
1720 			warn++;
1721 		} else
1722 			disable_trace_kprobe(tk, file);
1723 	}
1724 
1725 	tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1726 	if (WARN_ON_ONCE(tk == NULL)) {
1727 		pr_warn("error on getting 2nd test probe.\n");
1728 		warn++;
1729 	} else {
1730 		if (trace_kprobe_nhit(tk) != 1) {
1731 			pr_warn("incorrect number of testprobe2 hits\n");
1732 			warn++;
1733 		}
1734 
1735 		file = find_trace_probe_file(tk, top_trace_array());
1736 		if (WARN_ON_ONCE(file == NULL)) {
1737 			pr_warn("error on getting probe file.\n");
1738 			warn++;
1739 		} else
1740 			disable_trace_kprobe(tk, file);
1741 	}
1742 
1743 	ret = trace_run_command("-:testprobe", create_trace_kprobe);
1744 	if (WARN_ON_ONCE(ret)) {
1745 		pr_warn("error on deleting a probe.\n");
1746 		warn++;
1747 	}
1748 
1749 	ret = trace_run_command("-:testprobe2", create_trace_kprobe);
1750 	if (WARN_ON_ONCE(ret)) {
1751 		pr_warn("error on deleting a probe.\n");
1752 		warn++;
1753 	}
1754 
1755 end:
1756 	release_all_trace_kprobes();
1757 	/*
1758 	 * Wait for the optimizer work to finish. Otherwise it might fiddle
1759 	 * with probes in already freed __init text.
1760 	 */
1761 	wait_for_kprobe_optimizer();
1762 	if (warn)
1763 		pr_cont("NG: Some tests are failed. Please check them.\n");
1764 	else
1765 		pr_cont("OK\n");
1766 	return 0;
1767 }
1768 
1769 late_initcall(kprobe_trace_self_tests_init);
1770 
1771 #endif
1772