1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Kprobes-based tracing events
4 *
5 * Created by Masami Hiramatsu <mhiramat@redhat.com>
6 *
7 */
8 #define pr_fmt(fmt) "trace_kprobe: " fmt
9
10 #include <linux/module.h>
11 #include <linux/uaccess.h>
12 #include <linux/rculist.h>
13 #include <linux/error-injection.h>
14
15 #include "trace_kprobe_selftest.h"
16 #include "trace_probe.h"
17
18 #define KPROBE_EVENT_SYSTEM "kprobes"
19 #define KRETPROBE_MAXACTIVE_MAX 4096
20
21 /**
22 * Kprobe event core functions
23 */
24 struct trace_kprobe {
25 struct list_head list;
26 struct kretprobe rp; /* Use rp.kp for kprobe use */
27 unsigned long __percpu *nhit;
28 const char *symbol; /* symbol name */
29 struct trace_probe tp;
30 };
31
32 #define SIZEOF_TRACE_KPROBE(n) \
33 (offsetof(struct trace_kprobe, tp.args) + \
34 (sizeof(struct probe_arg) * (n)))
35
trace_kprobe_is_return(struct trace_kprobe * tk)36 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
37 {
38 return tk->rp.handler != NULL;
39 }
40
trace_kprobe_symbol(struct trace_kprobe * tk)41 static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
42 {
43 return tk->symbol ? tk->symbol : "unknown";
44 }
45
trace_kprobe_offset(struct trace_kprobe * tk)46 static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
47 {
48 return tk->rp.kp.offset;
49 }
50
trace_kprobe_has_gone(struct trace_kprobe * tk)51 static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
52 {
53 return !!(kprobe_gone(&tk->rp.kp));
54 }
55
trace_kprobe_within_module(struct trace_kprobe * tk,struct module * mod)56 static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
57 struct module *mod)
58 {
59 int len = strlen(mod->name);
60 const char *name = trace_kprobe_symbol(tk);
61 return strncmp(mod->name, name, len) == 0 && name[len] == ':';
62 }
63
trace_kprobe_module_exist(struct trace_kprobe * tk)64 static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
65 {
66 char *p;
67 bool ret;
68
69 if (!tk->symbol)
70 return false;
71 p = strchr(tk->symbol, ':');
72 if (!p)
73 return true;
74 *p = '\0';
75 mutex_lock(&module_mutex);
76 ret = !!find_module(tk->symbol);
77 mutex_unlock(&module_mutex);
78 *p = ':';
79
80 return ret;
81 }
82
trace_kprobe_nhit(struct trace_kprobe * tk)83 static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
84 {
85 unsigned long nhit = 0;
86 int cpu;
87
88 for_each_possible_cpu(cpu)
89 nhit += *per_cpu_ptr(tk->nhit, cpu);
90
91 return nhit;
92 }
93
94 /* Return 0 if it fails to find the symbol address */
95 static nokprobe_inline
trace_kprobe_address(struct trace_kprobe * tk)96 unsigned long trace_kprobe_address(struct trace_kprobe *tk)
97 {
98 unsigned long addr;
99
100 if (tk->symbol) {
101 addr = (unsigned long)
102 kallsyms_lookup_name(trace_kprobe_symbol(tk));
103 if (addr)
104 addr += tk->rp.kp.offset;
105 } else {
106 addr = (unsigned long)tk->rp.kp.addr;
107 }
108 return addr;
109 }
110
trace_kprobe_on_func_entry(struct trace_event_call * call)111 bool trace_kprobe_on_func_entry(struct trace_event_call *call)
112 {
113 struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
114
115 return kprobe_on_func_entry(tk->rp.kp.addr,
116 tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
117 tk->rp.kp.addr ? 0 : tk->rp.kp.offset);
118 }
119
trace_kprobe_error_injectable(struct trace_event_call * call)120 bool trace_kprobe_error_injectable(struct trace_event_call *call)
121 {
122 struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
123
124 return within_error_injection_list(trace_kprobe_address(tk));
125 }
126
127 static int register_kprobe_event(struct trace_kprobe *tk);
128 static int unregister_kprobe_event(struct trace_kprobe *tk);
129
130 static DEFINE_MUTEX(probe_lock);
131 static LIST_HEAD(probe_list);
132
133 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
134 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
135 struct pt_regs *regs);
136
137 /* Memory fetching by symbol */
138 struct symbol_cache {
139 char *symbol;
140 long offset;
141 unsigned long addr;
142 };
143
update_symbol_cache(struct symbol_cache * sc)144 unsigned long update_symbol_cache(struct symbol_cache *sc)
145 {
146 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
147
148 if (sc->addr)
149 sc->addr += sc->offset;
150
151 return sc->addr;
152 }
153
free_symbol_cache(struct symbol_cache * sc)154 void free_symbol_cache(struct symbol_cache *sc)
155 {
156 kfree(sc->symbol);
157 kfree(sc);
158 }
159
alloc_symbol_cache(const char * sym,long offset)160 struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
161 {
162 struct symbol_cache *sc;
163
164 if (!sym || strlen(sym) == 0)
165 return NULL;
166
167 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
168 if (!sc)
169 return NULL;
170
171 sc->symbol = kstrdup(sym, GFP_KERNEL);
172 if (!sc->symbol) {
173 kfree(sc);
174 return NULL;
175 }
176 sc->offset = offset;
177 update_symbol_cache(sc);
178
179 return sc;
180 }
181
182 /*
183 * Kprobes-specific fetch functions
184 */
185 #define DEFINE_FETCH_stack(type) \
186 static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
187 void *offset, void *dest) \
188 { \
189 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
190 (unsigned int)((unsigned long)offset)); \
191 } \
192 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
193
194 DEFINE_BASIC_FETCH_FUNCS(stack)
195 /* No string on the stack entry */
196 #define fetch_stack_string NULL
197 #define fetch_stack_string_size NULL
198
199 #define DEFINE_FETCH_memory(type) \
200 static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
201 void *addr, void *dest) \
202 { \
203 type retval; \
204 if (probe_kernel_address(addr, retval)) \
205 *(type *)dest = 0; \
206 else \
207 *(type *)dest = retval; \
208 } \
209 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
210
DEFINE_BASIC_FETCH_FUNCS(memory)211 DEFINE_BASIC_FETCH_FUNCS(memory)
212 /*
213 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
214 * length and relative data location.
215 */
216 static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
217 void *addr, void *dest)
218 {
219 int maxlen = get_rloc_len(*(u32 *)dest);
220 u8 *dst = get_rloc_data(dest);
221 long ret;
222
223 if (!maxlen)
224 return;
225
226 /*
227 * Try to get string again, since the string can be changed while
228 * probing.
229 */
230 ret = strncpy_from_unsafe(dst, addr, maxlen);
231
232 if (ret < 0) { /* Failed to fetch string */
233 dst[0] = '\0';
234 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
235 } else {
236 *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
237 }
238 }
239 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
240
241 /* Return the length of string -- including null terminal byte */
FETCH_FUNC_NAME(memory,string_size)242 static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
243 void *addr, void *dest)
244 {
245 mm_segment_t old_fs;
246 int ret, len = 0;
247 u8 c;
248
249 old_fs = get_fs();
250 set_fs(KERNEL_DS);
251 pagefault_disable();
252
253 do {
254 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
255 len++;
256 } while (c && ret == 0 && len < MAX_STRING_SIZE);
257
258 pagefault_enable();
259 set_fs(old_fs);
260
261 if (ret < 0) /* Failed to check the length */
262 *(u32 *)dest = 0;
263 else
264 *(u32 *)dest = len;
265 }
266 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
267
268 #define DEFINE_FETCH_symbol(type) \
269 void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
270 { \
271 struct symbol_cache *sc = data; \
272 if (sc->addr) \
273 fetch_memory_##type(regs, (void *)sc->addr, dest); \
274 else \
275 *(type *)dest = 0; \
276 } \
277 NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
278
279 DEFINE_BASIC_FETCH_FUNCS(symbol)
280 DEFINE_FETCH_symbol(string)
281 DEFINE_FETCH_symbol(string_size)
282
283 /* kprobes don't support file_offset fetch methods */
284 #define fetch_file_offset_u8 NULL
285 #define fetch_file_offset_u16 NULL
286 #define fetch_file_offset_u32 NULL
287 #define fetch_file_offset_u64 NULL
288 #define fetch_file_offset_string NULL
289 #define fetch_file_offset_string_size NULL
290
291 /* Fetch type information table */
292 static const struct fetch_type kprobes_fetch_type_table[] = {
293 /* Special types */
294 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
295 sizeof(u32), 1, "__data_loc char[]"),
296 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
297 string_size, sizeof(u32), 0, "u32"),
298 /* Basic types */
299 ASSIGN_FETCH_TYPE(u8, u8, 0),
300 ASSIGN_FETCH_TYPE(u16, u16, 0),
301 ASSIGN_FETCH_TYPE(u32, u32, 0),
302 ASSIGN_FETCH_TYPE(u64, u64, 0),
303 ASSIGN_FETCH_TYPE(s8, u8, 1),
304 ASSIGN_FETCH_TYPE(s16, u16, 1),
305 ASSIGN_FETCH_TYPE(s32, u32, 1),
306 ASSIGN_FETCH_TYPE(s64, u64, 1),
307 ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
308 ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
309 ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
310 ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
311
312 ASSIGN_FETCH_TYPE_END
313 };
314
315 /*
316 * Allocate new trace_probe and initialize it (including kprobes).
317 */
alloc_trace_kprobe(const char * group,const char * event,void * addr,const char * symbol,unsigned long offs,int maxactive,int nargs,bool is_return)318 static struct trace_kprobe *alloc_trace_kprobe(const char *group,
319 const char *event,
320 void *addr,
321 const char *symbol,
322 unsigned long offs,
323 int maxactive,
324 int nargs, bool is_return)
325 {
326 struct trace_kprobe *tk;
327 int ret = -ENOMEM;
328
329 tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
330 if (!tk)
331 return ERR_PTR(ret);
332
333 tk->nhit = alloc_percpu(unsigned long);
334 if (!tk->nhit)
335 goto error;
336
337 if (symbol) {
338 tk->symbol = kstrdup(symbol, GFP_KERNEL);
339 if (!tk->symbol)
340 goto error;
341 tk->rp.kp.symbol_name = tk->symbol;
342 tk->rp.kp.offset = offs;
343 } else
344 tk->rp.kp.addr = addr;
345
346 if (is_return)
347 tk->rp.handler = kretprobe_dispatcher;
348 else
349 tk->rp.kp.pre_handler = kprobe_dispatcher;
350
351 tk->rp.maxactive = maxactive;
352
353 if (!event || !is_good_name(event)) {
354 ret = -EINVAL;
355 goto error;
356 }
357
358 tk->tp.call.class = &tk->tp.class;
359 tk->tp.call.name = kstrdup(event, GFP_KERNEL);
360 if (!tk->tp.call.name)
361 goto error;
362
363 if (!group || !is_good_name(group)) {
364 ret = -EINVAL;
365 goto error;
366 }
367
368 tk->tp.class.system = kstrdup(group, GFP_KERNEL);
369 if (!tk->tp.class.system)
370 goto error;
371
372 INIT_LIST_HEAD(&tk->list);
373 INIT_LIST_HEAD(&tk->tp.files);
374 return tk;
375 error:
376 kfree(tk->tp.call.name);
377 kfree(tk->symbol);
378 free_percpu(tk->nhit);
379 kfree(tk);
380 return ERR_PTR(ret);
381 }
382
free_trace_kprobe(struct trace_kprobe * tk)383 static void free_trace_kprobe(struct trace_kprobe *tk)
384 {
385 int i;
386
387 for (i = 0; i < tk->tp.nr_args; i++)
388 traceprobe_free_probe_arg(&tk->tp.args[i]);
389
390 kfree(tk->tp.call.class->system);
391 kfree(tk->tp.call.name);
392 kfree(tk->symbol);
393 free_percpu(tk->nhit);
394 kfree(tk);
395 }
396
find_trace_kprobe(const char * event,const char * group)397 static struct trace_kprobe *find_trace_kprobe(const char *event,
398 const char *group)
399 {
400 struct trace_kprobe *tk;
401
402 list_for_each_entry(tk, &probe_list, list)
403 if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
404 strcmp(tk->tp.call.class->system, group) == 0)
405 return tk;
406 return NULL;
407 }
408
__enable_trace_kprobe(struct trace_kprobe * tk)409 static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
410 {
411 int ret = 0;
412
413 if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
414 if (trace_kprobe_is_return(tk))
415 ret = enable_kretprobe(&tk->rp);
416 else
417 ret = enable_kprobe(&tk->rp.kp);
418 }
419
420 return ret;
421 }
422
423 /*
424 * Enable trace_probe
425 * if the file is NULL, enable "perf" handler, or enable "trace" handler.
426 */
427 static int
enable_trace_kprobe(struct trace_kprobe * tk,struct trace_event_file * file)428 enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
429 {
430 struct event_file_link *link;
431 int ret = 0;
432
433 if (file) {
434 link = kmalloc(sizeof(*link), GFP_KERNEL);
435 if (!link) {
436 ret = -ENOMEM;
437 goto out;
438 }
439
440 link->file = file;
441 list_add_tail_rcu(&link->list, &tk->tp.files);
442
443 tk->tp.flags |= TP_FLAG_TRACE;
444 ret = __enable_trace_kprobe(tk);
445 if (ret) {
446 list_del_rcu(&link->list);
447 kfree(link);
448 tk->tp.flags &= ~TP_FLAG_TRACE;
449 }
450
451 } else {
452 tk->tp.flags |= TP_FLAG_PROFILE;
453 ret = __enable_trace_kprobe(tk);
454 if (ret)
455 tk->tp.flags &= ~TP_FLAG_PROFILE;
456 }
457 out:
458 return ret;
459 }
460
461 /*
462 * Disable trace_probe
463 * if the file is NULL, disable "perf" handler, or disable "trace" handler.
464 */
465 static int
disable_trace_kprobe(struct trace_kprobe * tk,struct trace_event_file * file)466 disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
467 {
468 struct event_file_link *link = NULL;
469 int wait = 0;
470 int ret = 0;
471
472 if (file) {
473 link = find_event_file_link(&tk->tp, file);
474 if (!link) {
475 ret = -EINVAL;
476 goto out;
477 }
478
479 list_del_rcu(&link->list);
480 wait = 1;
481 if (!list_empty(&tk->tp.files))
482 goto out;
483
484 tk->tp.flags &= ~TP_FLAG_TRACE;
485 } else
486 tk->tp.flags &= ~TP_FLAG_PROFILE;
487
488 if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
489 if (trace_kprobe_is_return(tk))
490 disable_kretprobe(&tk->rp);
491 else
492 disable_kprobe(&tk->rp.kp);
493 wait = 1;
494 }
495
496 /*
497 * if tk is not added to any list, it must be a local trace_kprobe
498 * created with perf_event_open. We don't need to wait for these
499 * trace_kprobes
500 */
501 if (list_empty(&tk->list))
502 wait = 0;
503 out:
504 if (wait) {
505 /*
506 * Synchronize with kprobe_trace_func/kretprobe_trace_func
507 * to ensure disabled (all running handlers are finished).
508 * This is not only for kfree(), but also the caller,
509 * trace_remove_event_call() supposes it for releasing
510 * event_call related objects, which will be accessed in
511 * the kprobe_trace_func/kretprobe_trace_func.
512 */
513 synchronize_sched();
514 kfree(link); /* Ignored if link == NULL */
515 }
516
517 return ret;
518 }
519
520 #if defined(CONFIG_KPROBES_ON_FTRACE) && \
521 !defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
__within_notrace_func(unsigned long addr)522 static bool __within_notrace_func(unsigned long addr)
523 {
524 unsigned long offset, size;
525
526 if (!addr || !kallsyms_lookup_size_offset(addr, &size, &offset))
527 return false;
528
529 /* Get the entry address of the target function */
530 addr -= offset;
531
532 /*
533 * Since ftrace_location_range() does inclusive range check, we need
534 * to subtract 1 byte from the end address.
535 */
536 return !ftrace_location_range(addr, addr + size - 1);
537 }
538
within_notrace_func(struct trace_kprobe * tk)539 static bool within_notrace_func(struct trace_kprobe *tk)
540 {
541 unsigned long addr = trace_kprobe_address(tk);
542 char symname[KSYM_NAME_LEN], *p;
543
544 if (!__within_notrace_func(addr))
545 return false;
546
547 /* Check if the address is on a suffixed-symbol */
548 if (!lookup_symbol_name(addr, symname)) {
549 p = strchr(symname, '.');
550 if (!p)
551 return true;
552 *p = '\0';
553 addr = (unsigned long)kprobe_lookup_name(symname, 0);
554 if (addr)
555 return __within_notrace_func(addr);
556 }
557
558 return true;
559 }
560 #else
561 #define within_notrace_func(tk) (false)
562 #endif
563
564 /* Internal register function - just handle k*probes and flags */
__register_trace_kprobe(struct trace_kprobe * tk)565 static int __register_trace_kprobe(struct trace_kprobe *tk)
566 {
567 int i, ret;
568
569 if (trace_probe_is_registered(&tk->tp))
570 return -EINVAL;
571
572 if (within_notrace_func(tk)) {
573 pr_warn("Could not probe notrace function %s\n",
574 trace_kprobe_symbol(tk));
575 return -EINVAL;
576 }
577
578 for (i = 0; i < tk->tp.nr_args; i++)
579 traceprobe_update_arg(&tk->tp.args[i]);
580
581 /* Set/clear disabled flag according to tp->flag */
582 if (trace_probe_is_enabled(&tk->tp))
583 tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
584 else
585 tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
586
587 if (trace_kprobe_is_return(tk))
588 ret = register_kretprobe(&tk->rp);
589 else
590 ret = register_kprobe(&tk->rp.kp);
591
592 if (ret == 0) {
593 tk->tp.flags |= TP_FLAG_REGISTERED;
594 } else if (ret == -EILSEQ) {
595 pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
596 tk->rp.kp.addr);
597 ret = -EINVAL;
598 }
599 return ret;
600 }
601
602 /* Internal unregister function - just handle k*probes and flags */
__unregister_trace_kprobe(struct trace_kprobe * tk)603 static void __unregister_trace_kprobe(struct trace_kprobe *tk)
604 {
605 if (trace_probe_is_registered(&tk->tp)) {
606 if (trace_kprobe_is_return(tk))
607 unregister_kretprobe(&tk->rp);
608 else
609 unregister_kprobe(&tk->rp.kp);
610 tk->tp.flags &= ~TP_FLAG_REGISTERED;
611 /* Cleanup kprobe for reuse */
612 if (tk->rp.kp.symbol_name)
613 tk->rp.kp.addr = NULL;
614 }
615 }
616
617 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
unregister_trace_kprobe(struct trace_kprobe * tk)618 static int unregister_trace_kprobe(struct trace_kprobe *tk)
619 {
620 /* Enabled event can not be unregistered */
621 if (trace_probe_is_enabled(&tk->tp))
622 return -EBUSY;
623
624 /* Will fail if probe is being used by ftrace or perf */
625 if (unregister_kprobe_event(tk))
626 return -EBUSY;
627
628 __unregister_trace_kprobe(tk);
629 list_del(&tk->list);
630
631 return 0;
632 }
633
634 /* Register a trace_probe and probe_event */
register_trace_kprobe(struct trace_kprobe * tk)635 static int register_trace_kprobe(struct trace_kprobe *tk)
636 {
637 struct trace_kprobe *old_tk;
638 int ret;
639
640 mutex_lock(&probe_lock);
641
642 /* Delete old (same name) event if exist */
643 old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
644 tk->tp.call.class->system);
645 if (old_tk) {
646 ret = unregister_trace_kprobe(old_tk);
647 if (ret < 0)
648 goto end;
649 free_trace_kprobe(old_tk);
650 }
651
652 /* Register new event */
653 ret = register_kprobe_event(tk);
654 if (ret) {
655 pr_warn("Failed to register probe event(%d)\n", ret);
656 goto end;
657 }
658
659 /* Register k*probe */
660 ret = __register_trace_kprobe(tk);
661 if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
662 pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
663 ret = 0;
664 }
665
666 if (ret < 0)
667 unregister_kprobe_event(tk);
668 else
669 list_add_tail(&tk->list, &probe_list);
670
671 end:
672 mutex_unlock(&probe_lock);
673 return ret;
674 }
675
676 /* Module notifier call back, checking event on the module */
trace_kprobe_module_callback(struct notifier_block * nb,unsigned long val,void * data)677 static int trace_kprobe_module_callback(struct notifier_block *nb,
678 unsigned long val, void *data)
679 {
680 struct module *mod = data;
681 struct trace_kprobe *tk;
682 int ret;
683
684 if (val != MODULE_STATE_COMING)
685 return NOTIFY_DONE;
686
687 /* Update probes on coming module */
688 mutex_lock(&probe_lock);
689 list_for_each_entry(tk, &probe_list, list) {
690 if (trace_kprobe_within_module(tk, mod)) {
691 /* Don't need to check busy - this should have gone. */
692 __unregister_trace_kprobe(tk);
693 ret = __register_trace_kprobe(tk);
694 if (ret)
695 pr_warn("Failed to re-register probe %s on %s: %d\n",
696 trace_event_name(&tk->tp.call),
697 mod->name, ret);
698 }
699 }
700 mutex_unlock(&probe_lock);
701
702 return NOTIFY_DONE;
703 }
704
705 static struct notifier_block trace_kprobe_module_nb = {
706 .notifier_call = trace_kprobe_module_callback,
707 .priority = 1 /* Invoked after kprobe module callback */
708 };
709
710 /* Convert certain expected symbols into '_' when generating event names */
sanitize_event_name(char * name)711 static inline void sanitize_event_name(char *name)
712 {
713 while (*name++ != '\0')
714 if (*name == ':' || *name == '.')
715 *name = '_';
716 }
717
create_trace_kprobe(int argc,char ** argv)718 static int create_trace_kprobe(int argc, char **argv)
719 {
720 /*
721 * Argument syntax:
722 * - Add kprobe:
723 * p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
724 * - Add kretprobe:
725 * r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
726 * Fetch args:
727 * $retval : fetch return value
728 * $stack : fetch stack address
729 * $stackN : fetch Nth of stack (N:0-)
730 * $comm : fetch current task comm
731 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
732 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
733 * %REG : fetch register REG
734 * Dereferencing memory fetch:
735 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
736 * Alias name of args:
737 * NAME=FETCHARG : set NAME as alias of FETCHARG.
738 * Type of args:
739 * FETCHARG:TYPE : use TYPE instead of unsigned long.
740 */
741 struct trace_kprobe *tk;
742 int i, ret = 0;
743 bool is_return = false, is_delete = false;
744 char *symbol = NULL, *event = NULL, *group = NULL;
745 int maxactive = 0;
746 char *arg;
747 long offset = 0;
748 void *addr = NULL;
749 char buf[MAX_EVENT_NAME_LEN];
750
751 /* argc must be >= 1 */
752 if (argv[0][0] == 'p')
753 is_return = false;
754 else if (argv[0][0] == 'r')
755 is_return = true;
756 else if (argv[0][0] == '-')
757 is_delete = true;
758 else {
759 pr_info("Probe definition must be started with 'p', 'r' or"
760 " '-'.\n");
761 return -EINVAL;
762 }
763
764 event = strchr(&argv[0][1], ':');
765 if (event) {
766 event[0] = '\0';
767 event++;
768 }
769 if (is_return && isdigit(argv[0][1])) {
770 ret = kstrtouint(&argv[0][1], 0, &maxactive);
771 if (ret) {
772 pr_info("Failed to parse maxactive.\n");
773 return ret;
774 }
775 /* kretprobes instances are iterated over via a list. The
776 * maximum should stay reasonable.
777 */
778 if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
779 pr_info("Maxactive is too big (%d > %d).\n",
780 maxactive, KRETPROBE_MAXACTIVE_MAX);
781 return -E2BIG;
782 }
783 }
784
785 if (event) {
786 if (strchr(event, '/')) {
787 group = event;
788 event = strchr(group, '/') + 1;
789 event[-1] = '\0';
790 if (strlen(group) == 0) {
791 pr_info("Group name is not specified\n");
792 return -EINVAL;
793 }
794 }
795 if (strlen(event) == 0) {
796 pr_info("Event name is not specified\n");
797 return -EINVAL;
798 }
799 }
800 if (!group)
801 group = KPROBE_EVENT_SYSTEM;
802
803 if (is_delete) {
804 if (!event) {
805 pr_info("Delete command needs an event name.\n");
806 return -EINVAL;
807 }
808 mutex_lock(&probe_lock);
809 tk = find_trace_kprobe(event, group);
810 if (!tk) {
811 mutex_unlock(&probe_lock);
812 pr_info("Event %s/%s doesn't exist.\n", group, event);
813 return -ENOENT;
814 }
815 /* delete an event */
816 ret = unregister_trace_kprobe(tk);
817 if (ret == 0)
818 free_trace_kprobe(tk);
819 mutex_unlock(&probe_lock);
820 return ret;
821 }
822
823 if (argc < 2) {
824 pr_info("Probe point is not specified.\n");
825 return -EINVAL;
826 }
827
828 /* try to parse an address. if that fails, try to read the
829 * input as a symbol. */
830 if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
831 /* a symbol specified */
832 symbol = argv[1];
833 /* TODO: support .init module functions */
834 ret = traceprobe_split_symbol_offset(symbol, &offset);
835 if (ret || offset < 0 || offset > UINT_MAX) {
836 pr_info("Failed to parse either an address or a symbol.\n");
837 return ret;
838 }
839 if (offset && is_return &&
840 !kprobe_on_func_entry(NULL, symbol, offset)) {
841 pr_info("Given offset is not valid for return probe.\n");
842 return -EINVAL;
843 }
844 }
845 argc -= 2; argv += 2;
846
847 /* setup a probe */
848 if (!event) {
849 /* Make a new event name */
850 if (symbol)
851 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
852 is_return ? 'r' : 'p', symbol, offset);
853 else
854 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
855 is_return ? 'r' : 'p', addr);
856 sanitize_event_name(buf);
857 event = buf;
858 }
859 tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
860 argc, is_return);
861 if (IS_ERR(tk)) {
862 pr_info("Failed to allocate trace_probe.(%d)\n",
863 (int)PTR_ERR(tk));
864 return PTR_ERR(tk);
865 }
866
867 /* parse arguments */
868 ret = 0;
869 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
870 struct probe_arg *parg = &tk->tp.args[i];
871
872 /* Increment count for freeing args in error case */
873 tk->tp.nr_args++;
874
875 /* Parse argument name */
876 arg = strchr(argv[i], '=');
877 if (arg) {
878 *arg++ = '\0';
879 parg->name = kstrdup(argv[i], GFP_KERNEL);
880 } else {
881 arg = argv[i];
882 /* If argument name is omitted, set "argN" */
883 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
884 parg->name = kstrdup(buf, GFP_KERNEL);
885 }
886
887 if (!parg->name) {
888 pr_info("Failed to allocate argument[%d] name.\n", i);
889 ret = -ENOMEM;
890 goto error;
891 }
892
893 if (!is_good_name(parg->name)) {
894 pr_info("Invalid argument[%d] name: %s\n",
895 i, parg->name);
896 ret = -EINVAL;
897 goto error;
898 }
899
900 if (traceprobe_conflict_field_name(parg->name,
901 tk->tp.args, i)) {
902 pr_info("Argument[%d] name '%s' conflicts with "
903 "another field.\n", i, argv[i]);
904 ret = -EINVAL;
905 goto error;
906 }
907
908 /* Parse fetch argument */
909 ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
910 is_return, true,
911 kprobes_fetch_type_table);
912 if (ret) {
913 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
914 goto error;
915 }
916 }
917
918 ret = register_trace_kprobe(tk);
919 if (ret)
920 goto error;
921 return 0;
922
923 error:
924 free_trace_kprobe(tk);
925 return ret;
926 }
927
release_all_trace_kprobes(void)928 static int release_all_trace_kprobes(void)
929 {
930 struct trace_kprobe *tk;
931 int ret = 0;
932
933 mutex_lock(&probe_lock);
934 /* Ensure no probe is in use. */
935 list_for_each_entry(tk, &probe_list, list)
936 if (trace_probe_is_enabled(&tk->tp)) {
937 ret = -EBUSY;
938 goto end;
939 }
940 /* TODO: Use batch unregistration */
941 while (!list_empty(&probe_list)) {
942 tk = list_entry(probe_list.next, struct trace_kprobe, list);
943 ret = unregister_trace_kprobe(tk);
944 if (ret)
945 goto end;
946 free_trace_kprobe(tk);
947 }
948
949 end:
950 mutex_unlock(&probe_lock);
951
952 return ret;
953 }
954
955 /* Probes listing interfaces */
probes_seq_start(struct seq_file * m,loff_t * pos)956 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
957 {
958 mutex_lock(&probe_lock);
959 return seq_list_start(&probe_list, *pos);
960 }
961
probes_seq_next(struct seq_file * m,void * v,loff_t * pos)962 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
963 {
964 return seq_list_next(v, &probe_list, pos);
965 }
966
probes_seq_stop(struct seq_file * m,void * v)967 static void probes_seq_stop(struct seq_file *m, void *v)
968 {
969 mutex_unlock(&probe_lock);
970 }
971
probes_seq_show(struct seq_file * m,void * v)972 static int probes_seq_show(struct seq_file *m, void *v)
973 {
974 struct trace_kprobe *tk = v;
975 int i;
976
977 seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
978 if (trace_kprobe_is_return(tk) && tk->rp.maxactive)
979 seq_printf(m, "%d", tk->rp.maxactive);
980 seq_printf(m, ":%s/%s", tk->tp.call.class->system,
981 trace_event_name(&tk->tp.call));
982
983 if (!tk->symbol)
984 seq_printf(m, " 0x%p", tk->rp.kp.addr);
985 else if (tk->rp.kp.offset)
986 seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
987 tk->rp.kp.offset);
988 else
989 seq_printf(m, " %s", trace_kprobe_symbol(tk));
990
991 for (i = 0; i < tk->tp.nr_args; i++)
992 seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
993 seq_putc(m, '\n');
994
995 return 0;
996 }
997
998 static const struct seq_operations probes_seq_op = {
999 .start = probes_seq_start,
1000 .next = probes_seq_next,
1001 .stop = probes_seq_stop,
1002 .show = probes_seq_show
1003 };
1004
probes_open(struct inode * inode,struct file * file)1005 static int probes_open(struct inode *inode, struct file *file)
1006 {
1007 int ret;
1008
1009 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
1010 ret = release_all_trace_kprobes();
1011 if (ret < 0)
1012 return ret;
1013 }
1014
1015 return seq_open(file, &probes_seq_op);
1016 }
1017
probes_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)1018 static ssize_t probes_write(struct file *file, const char __user *buffer,
1019 size_t count, loff_t *ppos)
1020 {
1021 return trace_parse_run_command(file, buffer, count, ppos,
1022 create_trace_kprobe);
1023 }
1024
1025 static const struct file_operations kprobe_events_ops = {
1026 .owner = THIS_MODULE,
1027 .open = probes_open,
1028 .read = seq_read,
1029 .llseek = seq_lseek,
1030 .release = seq_release,
1031 .write = probes_write,
1032 };
1033
1034 /* Probes profiling interfaces */
probes_profile_seq_show(struct seq_file * m,void * v)1035 static int probes_profile_seq_show(struct seq_file *m, void *v)
1036 {
1037 struct trace_kprobe *tk = v;
1038
1039 seq_printf(m, " %-44s %15lu %15lu\n",
1040 trace_event_name(&tk->tp.call),
1041 trace_kprobe_nhit(tk),
1042 tk->rp.kp.nmissed);
1043
1044 return 0;
1045 }
1046
1047 static const struct seq_operations profile_seq_op = {
1048 .start = probes_seq_start,
1049 .next = probes_seq_next,
1050 .stop = probes_seq_stop,
1051 .show = probes_profile_seq_show
1052 };
1053
profile_open(struct inode * inode,struct file * file)1054 static int profile_open(struct inode *inode, struct file *file)
1055 {
1056 return seq_open(file, &profile_seq_op);
1057 }
1058
1059 static const struct file_operations kprobe_profile_ops = {
1060 .owner = THIS_MODULE,
1061 .open = profile_open,
1062 .read = seq_read,
1063 .llseek = seq_lseek,
1064 .release = seq_release,
1065 };
1066
1067 /* Kprobe handler */
1068 static nokprobe_inline void
__kprobe_trace_func(struct trace_kprobe * tk,struct pt_regs * regs,struct trace_event_file * trace_file)1069 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
1070 struct trace_event_file *trace_file)
1071 {
1072 struct kprobe_trace_entry_head *entry;
1073 struct ring_buffer_event *event;
1074 struct ring_buffer *buffer;
1075 int size, dsize, pc;
1076 unsigned long irq_flags;
1077 struct trace_event_call *call = &tk->tp.call;
1078
1079 WARN_ON(call != trace_file->event_call);
1080
1081 if (trace_trigger_soft_disabled(trace_file))
1082 return;
1083
1084 local_save_flags(irq_flags);
1085 pc = preempt_count();
1086
1087 dsize = __get_data_size(&tk->tp, regs);
1088 size = sizeof(*entry) + tk->tp.size + dsize;
1089
1090 event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1091 call->event.type,
1092 size, irq_flags, pc);
1093 if (!event)
1094 return;
1095
1096 entry = ring_buffer_event_data(event);
1097 entry->ip = (unsigned long)tk->rp.kp.addr;
1098 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1099
1100 event_trigger_unlock_commit_regs(trace_file, buffer, event,
1101 entry, irq_flags, pc, regs);
1102 }
1103
1104 static void
kprobe_trace_func(struct trace_kprobe * tk,struct pt_regs * regs)1105 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
1106 {
1107 struct event_file_link *link;
1108
1109 list_for_each_entry_rcu(link, &tk->tp.files, list)
1110 __kprobe_trace_func(tk, regs, link->file);
1111 }
1112 NOKPROBE_SYMBOL(kprobe_trace_func);
1113
1114 /* Kretprobe handler */
1115 static nokprobe_inline void
__kretprobe_trace_func(struct trace_kprobe * tk,struct kretprobe_instance * ri,struct pt_regs * regs,struct trace_event_file * trace_file)1116 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1117 struct pt_regs *regs,
1118 struct trace_event_file *trace_file)
1119 {
1120 struct kretprobe_trace_entry_head *entry;
1121 struct ring_buffer_event *event;
1122 struct ring_buffer *buffer;
1123 int size, pc, dsize;
1124 unsigned long irq_flags;
1125 struct trace_event_call *call = &tk->tp.call;
1126
1127 WARN_ON(call != trace_file->event_call);
1128
1129 if (trace_trigger_soft_disabled(trace_file))
1130 return;
1131
1132 local_save_flags(irq_flags);
1133 pc = preempt_count();
1134
1135 dsize = __get_data_size(&tk->tp, regs);
1136 size = sizeof(*entry) + tk->tp.size + dsize;
1137
1138 event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1139 call->event.type,
1140 size, irq_flags, pc);
1141 if (!event)
1142 return;
1143
1144 entry = ring_buffer_event_data(event);
1145 entry->func = (unsigned long)tk->rp.kp.addr;
1146 entry->ret_ip = (unsigned long)ri->ret_addr;
1147 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1148
1149 event_trigger_unlock_commit_regs(trace_file, buffer, event,
1150 entry, irq_flags, pc, regs);
1151 }
1152
1153 static void
kretprobe_trace_func(struct trace_kprobe * tk,struct kretprobe_instance * ri,struct pt_regs * regs)1154 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1155 struct pt_regs *regs)
1156 {
1157 struct event_file_link *link;
1158
1159 list_for_each_entry_rcu(link, &tk->tp.files, list)
1160 __kretprobe_trace_func(tk, ri, regs, link->file);
1161 }
1162 NOKPROBE_SYMBOL(kretprobe_trace_func);
1163
1164 /* Event entry printers */
1165 static enum print_line_t
print_kprobe_event(struct trace_iterator * iter,int flags,struct trace_event * event)1166 print_kprobe_event(struct trace_iterator *iter, int flags,
1167 struct trace_event *event)
1168 {
1169 struct kprobe_trace_entry_head *field;
1170 struct trace_seq *s = &iter->seq;
1171 struct trace_probe *tp;
1172 u8 *data;
1173 int i;
1174
1175 field = (struct kprobe_trace_entry_head *)iter->ent;
1176 tp = container_of(event, struct trace_probe, call.event);
1177
1178 trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1179
1180 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1181 goto out;
1182
1183 trace_seq_putc(s, ')');
1184
1185 data = (u8 *)&field[1];
1186 for (i = 0; i < tp->nr_args; i++)
1187 if (!tp->args[i].type->print(s, tp->args[i].name,
1188 data + tp->args[i].offset, field))
1189 goto out;
1190
1191 trace_seq_putc(s, '\n');
1192 out:
1193 return trace_handle_return(s);
1194 }
1195
1196 static enum print_line_t
print_kretprobe_event(struct trace_iterator * iter,int flags,struct trace_event * event)1197 print_kretprobe_event(struct trace_iterator *iter, int flags,
1198 struct trace_event *event)
1199 {
1200 struct kretprobe_trace_entry_head *field;
1201 struct trace_seq *s = &iter->seq;
1202 struct trace_probe *tp;
1203 u8 *data;
1204 int i;
1205
1206 field = (struct kretprobe_trace_entry_head *)iter->ent;
1207 tp = container_of(event, struct trace_probe, call.event);
1208
1209 trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1210
1211 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1212 goto out;
1213
1214 trace_seq_puts(s, " <- ");
1215
1216 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1217 goto out;
1218
1219 trace_seq_putc(s, ')');
1220
1221 data = (u8 *)&field[1];
1222 for (i = 0; i < tp->nr_args; i++)
1223 if (!tp->args[i].type->print(s, tp->args[i].name,
1224 data + tp->args[i].offset, field))
1225 goto out;
1226
1227 trace_seq_putc(s, '\n');
1228
1229 out:
1230 return trace_handle_return(s);
1231 }
1232
1233
kprobe_event_define_fields(struct trace_event_call * event_call)1234 static int kprobe_event_define_fields(struct trace_event_call *event_call)
1235 {
1236 int ret, i;
1237 struct kprobe_trace_entry_head field;
1238 struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1239
1240 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1241 /* Set argument names as fields */
1242 for (i = 0; i < tk->tp.nr_args; i++) {
1243 struct probe_arg *parg = &tk->tp.args[i];
1244
1245 ret = trace_define_field(event_call, parg->type->fmttype,
1246 parg->name,
1247 sizeof(field) + parg->offset,
1248 parg->type->size,
1249 parg->type->is_signed,
1250 FILTER_OTHER);
1251 if (ret)
1252 return ret;
1253 }
1254 return 0;
1255 }
1256
kretprobe_event_define_fields(struct trace_event_call * event_call)1257 static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1258 {
1259 int ret, i;
1260 struct kretprobe_trace_entry_head field;
1261 struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1262
1263 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1264 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1265 /* Set argument names as fields */
1266 for (i = 0; i < tk->tp.nr_args; i++) {
1267 struct probe_arg *parg = &tk->tp.args[i];
1268
1269 ret = trace_define_field(event_call, parg->type->fmttype,
1270 parg->name,
1271 sizeof(field) + parg->offset,
1272 parg->type->size,
1273 parg->type->is_signed,
1274 FILTER_OTHER);
1275 if (ret)
1276 return ret;
1277 }
1278 return 0;
1279 }
1280
1281 #ifdef CONFIG_PERF_EVENTS
1282
1283 /* Kprobe profile handler */
1284 static int
kprobe_perf_func(struct trace_kprobe * tk,struct pt_regs * regs)1285 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1286 {
1287 struct trace_event_call *call = &tk->tp.call;
1288 struct kprobe_trace_entry_head *entry;
1289 struct hlist_head *head;
1290 int size, __size, dsize;
1291 int rctx;
1292
1293 if (bpf_prog_array_valid(call)) {
1294 unsigned long orig_ip = instruction_pointer(regs);
1295 int ret;
1296
1297 ret = trace_call_bpf(call, regs);
1298
1299 /*
1300 * We need to check and see if we modified the pc of the
1301 * pt_regs, and if so return 1 so that we don't do the
1302 * single stepping.
1303 */
1304 if (orig_ip != instruction_pointer(regs))
1305 return 1;
1306 if (!ret)
1307 return 0;
1308 }
1309
1310 head = this_cpu_ptr(call->perf_events);
1311 if (hlist_empty(head))
1312 return 0;
1313
1314 dsize = __get_data_size(&tk->tp, regs);
1315 __size = sizeof(*entry) + tk->tp.size + dsize;
1316 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1317 size -= sizeof(u32);
1318
1319 entry = perf_trace_buf_alloc(size, NULL, &rctx);
1320 if (!entry)
1321 return 0;
1322
1323 entry->ip = (unsigned long)tk->rp.kp.addr;
1324 memset(&entry[1], 0, dsize);
1325 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1326 perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1327 head, NULL);
1328 return 0;
1329 }
1330 NOKPROBE_SYMBOL(kprobe_perf_func);
1331
1332 /* Kretprobe profile handler */
1333 static void
kretprobe_perf_func(struct trace_kprobe * tk,struct kretprobe_instance * ri,struct pt_regs * regs)1334 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1335 struct pt_regs *regs)
1336 {
1337 struct trace_event_call *call = &tk->tp.call;
1338 struct kretprobe_trace_entry_head *entry;
1339 struct hlist_head *head;
1340 int size, __size, dsize;
1341 int rctx;
1342
1343 if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1344 return;
1345
1346 head = this_cpu_ptr(call->perf_events);
1347 if (hlist_empty(head))
1348 return;
1349
1350 dsize = __get_data_size(&tk->tp, regs);
1351 __size = sizeof(*entry) + tk->tp.size + dsize;
1352 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1353 size -= sizeof(u32);
1354
1355 entry = perf_trace_buf_alloc(size, NULL, &rctx);
1356 if (!entry)
1357 return;
1358
1359 entry->func = (unsigned long)tk->rp.kp.addr;
1360 entry->ret_ip = (unsigned long)ri->ret_addr;
1361 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1362 perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1363 head, NULL);
1364 }
1365 NOKPROBE_SYMBOL(kretprobe_perf_func);
1366
bpf_get_kprobe_info(const struct perf_event * event,u32 * fd_type,const char ** symbol,u64 * probe_offset,u64 * probe_addr,bool perf_type_tracepoint)1367 int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
1368 const char **symbol, u64 *probe_offset,
1369 u64 *probe_addr, bool perf_type_tracepoint)
1370 {
1371 const char *pevent = trace_event_name(event->tp_event);
1372 const char *group = event->tp_event->class->system;
1373 struct trace_kprobe *tk;
1374
1375 if (perf_type_tracepoint)
1376 tk = find_trace_kprobe(pevent, group);
1377 else
1378 tk = event->tp_event->data;
1379 if (!tk)
1380 return -EINVAL;
1381
1382 *fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
1383 : BPF_FD_TYPE_KPROBE;
1384 if (tk->symbol) {
1385 *symbol = tk->symbol;
1386 *probe_offset = tk->rp.kp.offset;
1387 *probe_addr = 0;
1388 } else {
1389 *symbol = NULL;
1390 *probe_offset = 0;
1391 *probe_addr = (unsigned long)tk->rp.kp.addr;
1392 }
1393 return 0;
1394 }
1395 #endif /* CONFIG_PERF_EVENTS */
1396
1397 /*
1398 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
1399 *
1400 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
1401 * lockless, but we can't race with this __init function.
1402 */
kprobe_register(struct trace_event_call * event,enum trace_reg type,void * data)1403 static int kprobe_register(struct trace_event_call *event,
1404 enum trace_reg type, void *data)
1405 {
1406 struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
1407 struct trace_event_file *file = data;
1408
1409 switch (type) {
1410 case TRACE_REG_REGISTER:
1411 return enable_trace_kprobe(tk, file);
1412 case TRACE_REG_UNREGISTER:
1413 return disable_trace_kprobe(tk, file);
1414
1415 #ifdef CONFIG_PERF_EVENTS
1416 case TRACE_REG_PERF_REGISTER:
1417 return enable_trace_kprobe(tk, NULL);
1418 case TRACE_REG_PERF_UNREGISTER:
1419 return disable_trace_kprobe(tk, NULL);
1420 case TRACE_REG_PERF_OPEN:
1421 case TRACE_REG_PERF_CLOSE:
1422 case TRACE_REG_PERF_ADD:
1423 case TRACE_REG_PERF_DEL:
1424 return 0;
1425 #endif
1426 }
1427 return 0;
1428 }
1429
kprobe_dispatcher(struct kprobe * kp,struct pt_regs * regs)1430 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1431 {
1432 struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1433 int ret = 0;
1434
1435 raw_cpu_inc(*tk->nhit);
1436
1437 if (tk->tp.flags & TP_FLAG_TRACE)
1438 kprobe_trace_func(tk, regs);
1439 #ifdef CONFIG_PERF_EVENTS
1440 if (tk->tp.flags & TP_FLAG_PROFILE)
1441 ret = kprobe_perf_func(tk, regs);
1442 #endif
1443 return ret;
1444 }
1445 NOKPROBE_SYMBOL(kprobe_dispatcher);
1446
1447 static int
kretprobe_dispatcher(struct kretprobe_instance * ri,struct pt_regs * regs)1448 kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1449 {
1450 struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1451
1452 raw_cpu_inc(*tk->nhit);
1453
1454 if (tk->tp.flags & TP_FLAG_TRACE)
1455 kretprobe_trace_func(tk, ri, regs);
1456 #ifdef CONFIG_PERF_EVENTS
1457 if (tk->tp.flags & TP_FLAG_PROFILE)
1458 kretprobe_perf_func(tk, ri, regs);
1459 #endif
1460 return 0; /* We don't tweek kernel, so just return 0 */
1461 }
1462 NOKPROBE_SYMBOL(kretprobe_dispatcher);
1463
1464 static struct trace_event_functions kretprobe_funcs = {
1465 .trace = print_kretprobe_event
1466 };
1467
1468 static struct trace_event_functions kprobe_funcs = {
1469 .trace = print_kprobe_event
1470 };
1471
init_trace_event_call(struct trace_kprobe * tk,struct trace_event_call * call)1472 static inline void init_trace_event_call(struct trace_kprobe *tk,
1473 struct trace_event_call *call)
1474 {
1475 INIT_LIST_HEAD(&call->class->fields);
1476 if (trace_kprobe_is_return(tk)) {
1477 call->event.funcs = &kretprobe_funcs;
1478 call->class->define_fields = kretprobe_event_define_fields;
1479 } else {
1480 call->event.funcs = &kprobe_funcs;
1481 call->class->define_fields = kprobe_event_define_fields;
1482 }
1483
1484 call->flags = TRACE_EVENT_FL_KPROBE;
1485 call->class->reg = kprobe_register;
1486 call->data = tk;
1487 }
1488
register_kprobe_event(struct trace_kprobe * tk)1489 static int register_kprobe_event(struct trace_kprobe *tk)
1490 {
1491 struct trace_event_call *call = &tk->tp.call;
1492 int ret = 0;
1493
1494 init_trace_event_call(tk, call);
1495
1496 if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
1497 return -ENOMEM;
1498 ret = register_trace_event(&call->event);
1499 if (!ret) {
1500 kfree(call->print_fmt);
1501 return -ENODEV;
1502 }
1503 ret = trace_add_event_call(call);
1504 if (ret) {
1505 pr_info("Failed to register kprobe event: %s\n",
1506 trace_event_name(call));
1507 kfree(call->print_fmt);
1508 unregister_trace_event(&call->event);
1509 }
1510 return ret;
1511 }
1512
unregister_kprobe_event(struct trace_kprobe * tk)1513 static int unregister_kprobe_event(struct trace_kprobe *tk)
1514 {
1515 int ret;
1516
1517 /* tp->event is unregistered in trace_remove_event_call() */
1518 ret = trace_remove_event_call(&tk->tp.call);
1519 if (!ret)
1520 kfree(tk->tp.call.print_fmt);
1521 return ret;
1522 }
1523
1524 #ifdef CONFIG_PERF_EVENTS
1525 /* create a trace_kprobe, but don't add it to global lists */
1526 struct trace_event_call *
create_local_trace_kprobe(char * func,void * addr,unsigned long offs,bool is_return)1527 create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
1528 bool is_return)
1529 {
1530 struct trace_kprobe *tk;
1531 int ret;
1532 char *event;
1533
1534 /*
1535 * local trace_kprobes are not added to probe_list, so they are never
1536 * searched in find_trace_kprobe(). Therefore, there is no concern of
1537 * duplicated name here.
1538 */
1539 event = func ? func : "DUMMY_EVENT";
1540
1541 tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
1542 offs, 0 /* maxactive */, 0 /* nargs */,
1543 is_return);
1544
1545 if (IS_ERR(tk)) {
1546 pr_info("Failed to allocate trace_probe.(%d)\n",
1547 (int)PTR_ERR(tk));
1548 return ERR_CAST(tk);
1549 }
1550
1551 init_trace_event_call(tk, &tk->tp.call);
1552
1553 if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
1554 ret = -ENOMEM;
1555 goto error;
1556 }
1557
1558 ret = __register_trace_kprobe(tk);
1559 if (ret < 0) {
1560 kfree(tk->tp.call.print_fmt);
1561 goto error;
1562 }
1563
1564 return &tk->tp.call;
1565 error:
1566 free_trace_kprobe(tk);
1567 return ERR_PTR(ret);
1568 }
1569
destroy_local_trace_kprobe(struct trace_event_call * event_call)1570 void destroy_local_trace_kprobe(struct trace_event_call *event_call)
1571 {
1572 struct trace_kprobe *tk;
1573
1574 tk = container_of(event_call, struct trace_kprobe, tp.call);
1575
1576 if (trace_probe_is_enabled(&tk->tp)) {
1577 WARN_ON(1);
1578 return;
1579 }
1580
1581 __unregister_trace_kprobe(tk);
1582
1583 kfree(tk->tp.call.print_fmt);
1584 free_trace_kprobe(tk);
1585 }
1586 #endif /* CONFIG_PERF_EVENTS */
1587
1588 /* Make a tracefs interface for controlling probe points */
init_kprobe_trace(void)1589 static __init int init_kprobe_trace(void)
1590 {
1591 struct dentry *d_tracer;
1592 struct dentry *entry;
1593
1594 if (register_module_notifier(&trace_kprobe_module_nb))
1595 return -EINVAL;
1596
1597 d_tracer = tracing_init_dentry();
1598 if (IS_ERR(d_tracer))
1599 return 0;
1600
1601 entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1602 NULL, &kprobe_events_ops);
1603
1604 /* Event list interface */
1605 if (!entry)
1606 pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1607
1608 /* Profile interface */
1609 entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1610 NULL, &kprobe_profile_ops);
1611
1612 if (!entry)
1613 pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1614 return 0;
1615 }
1616 fs_initcall(init_kprobe_trace);
1617
1618
1619 #ifdef CONFIG_FTRACE_STARTUP_TEST
1620 static __init struct trace_event_file *
find_trace_probe_file(struct trace_kprobe * tk,struct trace_array * tr)1621 find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1622 {
1623 struct trace_event_file *file;
1624
1625 list_for_each_entry(file, &tr->events, list)
1626 if (file->event_call == &tk->tp.call)
1627 return file;
1628
1629 return NULL;
1630 }
1631
1632 /*
1633 * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1634 * stage, we can do this lockless.
1635 */
kprobe_trace_self_tests_init(void)1636 static __init int kprobe_trace_self_tests_init(void)
1637 {
1638 int ret, warn = 0;
1639 int (*target)(int, int, int, int, int, int);
1640 struct trace_kprobe *tk;
1641 struct trace_event_file *file;
1642
1643 if (tracing_is_disabled())
1644 return -ENODEV;
1645
1646 target = kprobe_trace_selftest_target;
1647
1648 pr_info("Testing kprobe tracing: ");
1649
1650 ret = trace_run_command("p:testprobe kprobe_trace_selftest_target "
1651 "$stack $stack0 +0($stack)",
1652 create_trace_kprobe);
1653 if (WARN_ON_ONCE(ret)) {
1654 pr_warn("error on probing function entry.\n");
1655 warn++;
1656 } else {
1657 /* Enable trace point */
1658 tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1659 if (WARN_ON_ONCE(tk == NULL)) {
1660 pr_warn("error on getting new probe.\n");
1661 warn++;
1662 } else {
1663 file = find_trace_probe_file(tk, top_trace_array());
1664 if (WARN_ON_ONCE(file == NULL)) {
1665 pr_warn("error on getting probe file.\n");
1666 warn++;
1667 } else
1668 enable_trace_kprobe(tk, file);
1669 }
1670 }
1671
1672 ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target "
1673 "$retval", create_trace_kprobe);
1674 if (WARN_ON_ONCE(ret)) {
1675 pr_warn("error on probing function return.\n");
1676 warn++;
1677 } else {
1678 /* Enable trace point */
1679 tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1680 if (WARN_ON_ONCE(tk == NULL)) {
1681 pr_warn("error on getting 2nd new probe.\n");
1682 warn++;
1683 } else {
1684 file = find_trace_probe_file(tk, top_trace_array());
1685 if (WARN_ON_ONCE(file == NULL)) {
1686 pr_warn("error on getting probe file.\n");
1687 warn++;
1688 } else
1689 enable_trace_kprobe(tk, file);
1690 }
1691 }
1692
1693 if (warn)
1694 goto end;
1695
1696 ret = target(1, 2, 3, 4, 5, 6);
1697
1698 /*
1699 * Not expecting an error here, the check is only to prevent the
1700 * optimizer from removing the call to target() as otherwise there
1701 * are no side-effects and the call is never performed.
1702 */
1703 if (ret != 21)
1704 warn++;
1705
1706 /* Disable trace points before removing it */
1707 tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1708 if (WARN_ON_ONCE(tk == NULL)) {
1709 pr_warn("error on getting test probe.\n");
1710 warn++;
1711 } else {
1712 if (trace_kprobe_nhit(tk) != 1) {
1713 pr_warn("incorrect number of testprobe hits\n");
1714 warn++;
1715 }
1716
1717 file = find_trace_probe_file(tk, top_trace_array());
1718 if (WARN_ON_ONCE(file == NULL)) {
1719 pr_warn("error on getting probe file.\n");
1720 warn++;
1721 } else
1722 disable_trace_kprobe(tk, file);
1723 }
1724
1725 tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1726 if (WARN_ON_ONCE(tk == NULL)) {
1727 pr_warn("error on getting 2nd test probe.\n");
1728 warn++;
1729 } else {
1730 if (trace_kprobe_nhit(tk) != 1) {
1731 pr_warn("incorrect number of testprobe2 hits\n");
1732 warn++;
1733 }
1734
1735 file = find_trace_probe_file(tk, top_trace_array());
1736 if (WARN_ON_ONCE(file == NULL)) {
1737 pr_warn("error on getting probe file.\n");
1738 warn++;
1739 } else
1740 disable_trace_kprobe(tk, file);
1741 }
1742
1743 ret = trace_run_command("-:testprobe", create_trace_kprobe);
1744 if (WARN_ON_ONCE(ret)) {
1745 pr_warn("error on deleting a probe.\n");
1746 warn++;
1747 }
1748
1749 ret = trace_run_command("-:testprobe2", create_trace_kprobe);
1750 if (WARN_ON_ONCE(ret)) {
1751 pr_warn("error on deleting a probe.\n");
1752 warn++;
1753 }
1754
1755 end:
1756 release_all_trace_kprobes();
1757 /*
1758 * Wait for the optimizer work to finish. Otherwise it might fiddle
1759 * with probes in already freed __init text.
1760 */
1761 wait_for_kprobe_optimizer();
1762 if (warn)
1763 pr_cont("NG: Some tests are failed. Please check them.\n");
1764 else
1765 pr_cont("OK\n");
1766 return 0;
1767 }
1768
1769 late_initcall(kprobe_trace_self_tests_init);
1770
1771 #endif
1772