• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _LINUX_TRACEPOINT_H
3 #define _LINUX_TRACEPOINT_H
4 
5 /*
6  * Kernel Tracepoint API.
7  *
8  * See Documentation/trace/tracepoints.rst.
9  *
10  * Copyright (C) 2008-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
11  *
12  * Heavily inspired from the Linux Kernel Markers.
13  */
14 
15 #include <linux/smp.h>
16 #include <linux/srcu.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/cpumask.h>
20 #include <linux/rcupdate.h>
21 #include <linux/tracepoint-defs.h>
22 #include <linux/static_call.h>
23 
24 struct module;
25 struct tracepoint;
26 struct notifier_block;
27 
28 struct trace_eval_map {
29 	const char		*system;
30 	const char		*eval_string;
31 	unsigned long		eval_value;
32 };
33 
34 #define TRACEPOINT_DEFAULT_PRIO	10
35 
36 extern struct srcu_struct tracepoint_srcu;
37 
38 extern int
39 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
40 extern int
41 tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
42 			       int prio);
43 extern int
44 tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data,
45 					 int prio);
46 extern int
47 tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
48 static inline int
tracepoint_probe_register_may_exist(struct tracepoint * tp,void * probe,void * data)49 tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe,
50 				    void *data)
51 {
52 	return tracepoint_probe_register_prio_may_exist(tp, probe, data,
53 							TRACEPOINT_DEFAULT_PRIO);
54 }
55 extern void
56 for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
57 		void *priv);
58 
59 #ifdef CONFIG_MODULES
60 struct tp_module {
61 	struct list_head list;
62 	struct module *mod;
63 };
64 
65 bool trace_module_has_bad_taint(struct module *mod);
66 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
67 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
68 #else
trace_module_has_bad_taint(struct module * mod)69 static inline bool trace_module_has_bad_taint(struct module *mod)
70 {
71 	return false;
72 }
73 static inline
register_tracepoint_module_notifier(struct notifier_block * nb)74 int register_tracepoint_module_notifier(struct notifier_block *nb)
75 {
76 	return 0;
77 }
78 static inline
unregister_tracepoint_module_notifier(struct notifier_block * nb)79 int unregister_tracepoint_module_notifier(struct notifier_block *nb)
80 {
81 	return 0;
82 }
83 #endif /* CONFIG_MODULES */
84 
85 /*
86  * tracepoint_synchronize_unregister must be called between the last tracepoint
87  * probe unregistration and the end of module exit to make sure there is no
88  * caller executing a probe when it is freed.
89  */
90 #ifdef CONFIG_TRACEPOINTS
tracepoint_synchronize_unregister(void)91 static inline void tracepoint_synchronize_unregister(void)
92 {
93 	synchronize_srcu(&tracepoint_srcu);
94 	synchronize_rcu();
95 }
96 #else
tracepoint_synchronize_unregister(void)97 static inline void tracepoint_synchronize_unregister(void)
98 { }
99 #endif
100 
101 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
102 extern int syscall_regfunc(void);
103 extern void syscall_unregfunc(void);
104 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
105 
106 #ifndef PARAMS
107 #define PARAMS(args...) args
108 #endif
109 
110 #define TRACE_DEFINE_ENUM(x)
111 #define TRACE_DEFINE_SIZEOF(x)
112 
113 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
tracepoint_ptr_deref(tracepoint_ptr_t * p)114 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
115 {
116 	return offset_to_ptr(p);
117 }
118 
119 #define __TRACEPOINT_ENTRY(name)					\
120 	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
121 	    "	.balign 4					\n"	\
122 	    "	.long 	__tracepoint_" #name " - .		\n"	\
123 	    "	.previous					\n")
124 #else
tracepoint_ptr_deref(tracepoint_ptr_t * p)125 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
126 {
127 	return *p;
128 }
129 
130 #define __TRACEPOINT_ENTRY(name)					 \
131 	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
132 	__section("__tracepoints_ptrs") = &__tracepoint_##name
133 #endif
134 
135 #endif /* _LINUX_TRACEPOINT_H */
136 
137 /*
138  * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
139  *  file ifdef protection.
140  *  This is due to the way trace events work. If a file includes two
141  *  trace event headers under one "CREATE_TRACE_POINTS" the first include
142  *  will override the TRACE_EVENT and break the second include.
143  */
144 
145 #ifndef DECLARE_TRACE
146 
147 #define TP_PROTO(args...)	args
148 #define TP_ARGS(args...)	args
149 #define TP_CONDITION(args...)	args
150 
151 /*
152  * Individual subsystem my have a separate configuration to
153  * enable their tracepoints. By default, this file will create
154  * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem
155  * wants to be able to disable its tracepoints from being created
156  * it can define NOTRACE before including the tracepoint headers.
157  */
158 #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
159 #define TRACEPOINTS_ENABLED
160 #endif
161 
162 #ifdef TRACEPOINTS_ENABLED
163 
164 #ifdef CONFIG_HAVE_STATIC_CALL
165 #define __DO_TRACE_CALL(name, args)					\
166 	do {								\
167 		struct tracepoint_func *it_func_ptr;			\
168 		void *__data;						\
169 		it_func_ptr =						\
170 			rcu_dereference_raw((&__tracepoint_##name)->funcs); \
171 		if (it_func_ptr) {					\
172 			__data = (it_func_ptr)->data;			\
173 			static_call(tp_func_##name)(__data, args);	\
174 		}							\
175 	} while (0)
176 #else
177 #define __DO_TRACE_CALL(name, args)	__traceiter_##name(NULL, args)
178 #endif /* CONFIG_HAVE_STATIC_CALL */
179 
180 /*
181  * it_func[0] is never NULL because there is at least one element in the array
182  * when the array itself is non NULL.
183  */
184 #define __DO_TRACE(name, args, cond, rcuidle)				\
185 	do {								\
186 		int __maybe_unused __idx = 0;				\
187 									\
188 		if (!(cond))						\
189 			return;						\
190 									\
191 		/* srcu can't be used from NMI */			\
192 		WARN_ON_ONCE(rcuidle && in_nmi());			\
193 									\
194 		/* keep srcu and sched-rcu usage consistent */		\
195 		preempt_disable_notrace();				\
196 									\
197 		/*							\
198 		 * For rcuidle callers, use srcu since sched-rcu	\
199 		 * doesn't work from the idle path.			\
200 		 */							\
201 		if (rcuidle) {						\
202 			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
203 			rcu_irq_enter_irqson();				\
204 		}							\
205 									\
206 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
207 									\
208 		if (rcuidle) {						\
209 			rcu_irq_exit_irqson();				\
210 			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
211 		}							\
212 									\
213 		preempt_enable_notrace();				\
214 	} while (0)
215 
216 #ifndef MODULE
217 #define __DECLARE_TRACE_RCU(name, proto, args, cond)			\
218 	static inline void trace_##name##_rcuidle(proto)		\
219 	{								\
220 		if (static_key_false(&__tracepoint_##name.key))		\
221 			__DO_TRACE(name,				\
222 				TP_ARGS(args),				\
223 				TP_CONDITION(cond), 1);			\
224 	}
225 #else
226 #define __DECLARE_TRACE_RCU(name, proto, args, cond)
227 #endif
228 
229 /*
230  * Make sure the alignment of the structure in the __tracepoints section will
231  * not add unwanted padding between the beginning of the section and the
232  * structure. Force alignment to the same alignment as the section start.
233  *
234  * When lockdep is enabled, we make sure to always test if RCU is
235  * "watching" regardless if the tracepoint is enabled or not. Tracepoints
236  * require RCU to be active, and it should always warn at the tracepoint
237  * site if it is not watching, as it will need to be active when the
238  * tracepoint is enabled.
239  */
240 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
241 	extern int __traceiter_##name(data_proto);			\
242 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
243 	extern struct tracepoint __tracepoint_##name;			\
244 	static inline void __nocfi trace_##name(proto)				\
245 	{								\
246 		if (static_key_false(&__tracepoint_##name.key))		\
247 			__DO_TRACE(name,				\
248 				TP_ARGS(args),				\
249 				TP_CONDITION(cond), 0);			\
250 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
251 			WARN_ON_ONCE(!rcu_is_watching());		\
252 		}							\
253 	}								\
254 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
255 			    PARAMS(cond))				\
256 	static inline int						\
257 	register_trace_##name(void (*probe)(data_proto), void *data)	\
258 	{								\
259 		return tracepoint_probe_register(&__tracepoint_##name,	\
260 						(void *)probe, data);	\
261 	}								\
262 	static inline int						\
263 	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
264 				   int prio)				\
265 	{								\
266 		return tracepoint_probe_register_prio(&__tracepoint_##name, \
267 					      (void *)probe, data, prio); \
268 	}								\
269 	static inline int						\
270 	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
271 	{								\
272 		return tracepoint_probe_unregister(&__tracepoint_##name,\
273 						(void *)probe, data);	\
274 	}								\
275 	static inline void						\
276 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
277 	{								\
278 	}								\
279 	static inline bool						\
280 	trace_##name##_enabled(void)					\
281 	{								\
282 		return static_key_false(&__tracepoint_##name.key);	\
283 	}
284 
285 /*
286  * We have no guarantee that gcc and the linker won't up-align the tracepoint
287  * structures, so we create an array of pointers that will be used for iteration
288  * on the tracepoints.
289  */
290 #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)		\
291 	static const char __tpstrtab_##_name[]				\
292 	__section("__tracepoints_strings") = #_name;			\
293 	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
294 	int __traceiter_##_name(void *__data, proto);			\
295 	struct tracepoint __tracepoint_##_name	__used			\
296 	__section("__tracepoints") = {					\
297 		.name = __tpstrtab_##_name,				\
298 		.key = STATIC_KEY_INIT_FALSE,				\
299 		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
300 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
301 		.iterator = &__traceiter_##_name,			\
302 		.regfunc = _reg,					\
303 		.unregfunc = _unreg,					\
304 		.funcs = NULL };					\
305 	__TRACEPOINT_ENTRY(_name);					\
306 	int __nocfi __traceiter_##_name(void *__data, proto)			\
307 	{								\
308 		struct tracepoint_func *it_func_ptr;			\
309 		void *it_func;						\
310 									\
311 		it_func_ptr =						\
312 			rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
313 		if (it_func_ptr) {					\
314 			do {						\
315 				it_func = READ_ONCE((it_func_ptr)->func); \
316 				__data = (it_func_ptr)->data;		\
317 				((void(*)(void *, proto))(it_func))(__data, args); \
318 			} while ((++it_func_ptr)->func);		\
319 		}							\
320 		return 0;						\
321 	}								\
322 	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
323 
324 #define DEFINE_TRACE(name, proto, args)		\
325 	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
326 
327 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
328 	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
329 	EXPORT_SYMBOL_GPL(__traceiter_##name);				\
330 	EXPORT_STATIC_CALL_GPL(tp_func_##name)
331 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
332 	EXPORT_SYMBOL(__tracepoint_##name);				\
333 	EXPORT_SYMBOL(__traceiter_##name);				\
334 	EXPORT_STATIC_CALL(tp_func_##name)
335 
336 
337 #else /* !TRACEPOINTS_ENABLED */
338 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
339 	static inline void trace_##name(proto)				\
340 	{ }								\
341 	static inline void trace_##name##_rcuidle(proto)		\
342 	{ }								\
343 	static inline int						\
344 	register_trace_##name(void (*probe)(data_proto),		\
345 			      void *data)				\
346 	{								\
347 		return -ENOSYS;						\
348 	}								\
349 	static inline int						\
350 	unregister_trace_##name(void (*probe)(data_proto),		\
351 				void *data)				\
352 	{								\
353 		return -ENOSYS;						\
354 	}								\
355 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
356 	{								\
357 	}								\
358 	static inline bool						\
359 	trace_##name##_enabled(void)					\
360 	{								\
361 		return false;						\
362 	}
363 
364 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
365 #define DEFINE_TRACE(name, proto, args)
366 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
367 #define EXPORT_TRACEPOINT_SYMBOL(name)
368 
369 #endif /* TRACEPOINTS_ENABLED */
370 
371 #ifdef CONFIG_TRACING
372 /**
373  * tracepoint_string - register constant persistent string to trace system
374  * @str - a constant persistent string that will be referenced in tracepoints
375  *
376  * If constant strings are being used in tracepoints, it is faster and
377  * more efficient to just save the pointer to the string and reference
378  * that with a printf "%s" instead of saving the string in the ring buffer
379  * and wasting space and time.
380  *
381  * The problem with the above approach is that userspace tools that read
382  * the binary output of the trace buffers do not have access to the string.
383  * Instead they just show the address of the string which is not very
384  * useful to users.
385  *
386  * With tracepoint_string(), the string will be registered to the tracing
387  * system and exported to userspace via the debugfs/tracing/printk_formats
388  * file that maps the string address to the string text. This way userspace
389  * tools that read the binary buffers have a way to map the pointers to
390  * the ASCII strings they represent.
391  *
392  * The @str used must be a constant string and persistent as it would not
393  * make sense to show a string that no longer exists. But it is still fine
394  * to be used with modules, because when modules are unloaded, if they
395  * had tracepoints, the ring buffers are cleared too. As long as the string
396  * does not change during the life of the module, it is fine to use
397  * tracepoint_string() within a module.
398  */
399 #define tracepoint_string(str)						\
400 	({								\
401 		static const char *___tp_str __tracepoint_string = str; \
402 		___tp_str;						\
403 	})
404 #define __tracepoint_string	__used __section("__tracepoint_str")
405 #else
406 /*
407  * tracepoint_string() is used to save the string address for userspace
408  * tracing tools. When tracing isn't configured, there's no need to save
409  * anything.
410  */
411 # define tracepoint_string(str) str
412 # define __tracepoint_string
413 #endif
414 
415 #define DECLARE_TRACE(name, proto, args)				\
416 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
417 			cpu_online(raw_smp_processor_id()),		\
418 			PARAMS(void *__data, proto))
419 
420 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
421 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
422 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
423 			PARAMS(void *__data, proto))
424 
425 #define TRACE_EVENT_FLAGS(event, flag)
426 
427 #define TRACE_EVENT_PERF_PERM(event, expr...)
428 
429 #endif /* DECLARE_TRACE */
430 
431 #ifndef TRACE_EVENT
432 /*
433  * For use with the TRACE_EVENT macro:
434  *
435  * We define a tracepoint, its arguments, its printk format
436  * and its 'fast binary record' layout.
437  *
438  * Firstly, name your tracepoint via TRACE_EVENT(name : the
439  * 'subsystem_event' notation is fine.
440  *
441  * Think about this whole construct as the
442  * 'trace_sched_switch() function' from now on.
443  *
444  *
445  *  TRACE_EVENT(sched_switch,
446  *
447  *	*
448  *	* A function has a regular function arguments
449  *	* prototype, declare it via TP_PROTO():
450  *	*
451  *
452  *	TP_PROTO(struct rq *rq, struct task_struct *prev,
453  *		 struct task_struct *next),
454  *
455  *	*
456  *	* Define the call signature of the 'function'.
457  *	* (Design sidenote: we use this instead of a
458  *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
459  *	*
460  *
461  *	TP_ARGS(rq, prev, next),
462  *
463  *	*
464  *	* Fast binary tracing: define the trace record via
465  *	* TP_STRUCT__entry(). You can think about it like a
466  *	* regular C structure local variable definition.
467  *	*
468  *	* This is how the trace record is structured and will
469  *	* be saved into the ring buffer. These are the fields
470  *	* that will be exposed to user-space in
471  *	* /sys/kernel/debug/tracing/events/<*>/format.
472  *	*
473  *	* The declared 'local variable' is called '__entry'
474  *	*
475  *	* __field(pid_t, prev_pid) is equivalent to a standard declaration:
476  *	*
477  *	*	pid_t	prev_pid;
478  *	*
479  *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
480  *	*
481  *	*	char	prev_comm[TASK_COMM_LEN];
482  *	*
483  *
484  *	TP_STRUCT__entry(
485  *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
486  *		__field(	pid_t,	prev_pid			)
487  *		__field(	int,	prev_prio			)
488  *		__array(	char,	next_comm,	TASK_COMM_LEN	)
489  *		__field(	pid_t,	next_pid			)
490  *		__field(	int,	next_prio			)
491  *	),
492  *
493  *	*
494  *	* Assign the entry into the trace record, by embedding
495  *	* a full C statement block into TP_fast_assign(). You
496  *	* can refer to the trace record as '__entry' -
497  *	* otherwise you can put arbitrary C code in here.
498  *	*
499  *	* Note: this C code will execute every time a trace event
500  *	* happens, on an active tracepoint.
501  *	*
502  *
503  *	TP_fast_assign(
504  *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
505  *		__entry->prev_pid	= prev->pid;
506  *		__entry->prev_prio	= prev->prio;
507  *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
508  *		__entry->next_pid	= next->pid;
509  *		__entry->next_prio	= next->prio;
510  *	),
511  *
512  *	*
513  *	* Formatted output of a trace record via TP_printk().
514  *	* This is how the tracepoint will appear under ftrace
515  *	* plugins that make use of this tracepoint.
516  *	*
517  *	* (raw-binary tracing wont actually perform this step.)
518  *	*
519  *
520  *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
521  *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
522  *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
523  *
524  * );
525  *
526  * This macro construct is thus used for the regular printk format
527  * tracing setup, it is used to construct a function pointer based
528  * tracepoint callback (this is used by programmatic plugins and
529  * can also by used by generic instrumentation like SystemTap), and
530  * it is also used to expose a structured trace record in
531  * /sys/kernel/debug/tracing/events/.
532  *
533  * A set of (un)registration functions can be passed to the variant
534  * TRACE_EVENT_FN to perform any (un)registration work.
535  */
536 
537 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
538 #define DEFINE_EVENT(template, name, proto, args)		\
539 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
540 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
541 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
542 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
543 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
544 #define DEFINE_EVENT_CONDITION(template, name, proto,		\
545 			       args, cond)			\
546 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
547 				PARAMS(args), PARAMS(cond))
548 
549 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
550 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
551 #define TRACE_EVENT_FN(name, proto, args, struct,		\
552 		assign, print, reg, unreg)			\
553 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
554 #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
555 		assign, print, reg, unreg)			\
556 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
557 			PARAMS(args), PARAMS(cond))
558 #define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
559 			      struct, assign, print)		\
560 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
561 				PARAMS(args), PARAMS(cond))
562 
563 #define TRACE_EVENT_FLAGS(event, flag)
564 
565 #define TRACE_EVENT_PERF_PERM(event, expr...)
566 
567 #define DECLARE_EVENT_NOP(name, proto, args)				\
568 	static inline void trace_##name(proto)				\
569 	{ }								\
570 	static inline bool trace_##name##_enabled(void)			\
571 	{								\
572 		return false;						\
573 	}
574 
575 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)	\
576 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
577 
578 #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)
579 #define DEFINE_EVENT_NOP(template, name, proto, args)			\
580 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
581 
582 #endif /* ifdef TRACE_EVENT (see note above) */
583