• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <trace/hooks/ftrace_dump.h>
53 
54 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
55 
56 #include "trace.h"
57 #include "trace_output.h"
58 
59 #ifdef CONFIG_FTRACE_STARTUP_TEST
60 /*
61  * We need to change this state when a selftest is running.
62  * A selftest will lurk into the ring-buffer to count the
63  * entries inserted during the selftest although some concurrent
64  * insertions into the ring-buffer such as trace_printk could occurred
65  * at the same time, giving false positive or negative results.
66  */
67 static bool __read_mostly tracing_selftest_running;
68 
69 /*
70  * If boot-time tracing including tracers/events via kernel cmdline
71  * is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
disable_tracing_selftest(const char * reason)75 void __init disable_tracing_selftest(const char *reason)
76 {
77 	if (!tracing_selftest_disabled) {
78 		tracing_selftest_disabled = true;
79 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
80 	}
81 }
82 #else
83 #define tracing_selftest_running	0
84 #define tracing_selftest_disabled	0
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 static struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  * Set instance name if you want to dump the specific trace instance
136  * Multiple instance dump is also supported, and instances are seperated
137  * by commas.
138  */
139 /* Set to string format zero to disable by default */
140 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141 
142 /* When set, tracing will stop when a WARN*() is hit */
143 int __disable_trace_on_warning;
144 
145 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
146 /* Map of enums to their values, for "eval_map" file */
147 struct trace_eval_map_head {
148 	struct module			*mod;
149 	unsigned long			length;
150 };
151 
152 union trace_eval_map_item;
153 
154 struct trace_eval_map_tail {
155 	/*
156 	 * "end" is first and points to NULL as it must be different
157 	 * than "mod" or "eval_string"
158 	 */
159 	union trace_eval_map_item	*next;
160 	const char			*end;	/* points to NULL */
161 };
162 
163 static DEFINE_MUTEX(trace_eval_mutex);
164 
165 /*
166  * The trace_eval_maps are saved in an array with two extra elements,
167  * one at the beginning, and one at the end. The beginning item contains
168  * the count of the saved maps (head.length), and the module they
169  * belong to if not built in (head.mod). The ending item contains a
170  * pointer to the next array of saved eval_map items.
171  */
172 union trace_eval_map_item {
173 	struct trace_eval_map		map;
174 	struct trace_eval_map_head	head;
175 	struct trace_eval_map_tail	tail;
176 };
177 
178 static union trace_eval_map_item *trace_eval_maps;
179 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
180 
181 int tracing_set_tracer(struct trace_array *tr, const char *buf);
182 static void ftrace_trace_userstack(struct trace_array *tr,
183 				   struct trace_buffer *buffer,
184 				   unsigned int trace_ctx);
185 
186 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
187 static char *default_bootup_tracer;
188 
189 static bool allocate_snapshot;
190 static bool snapshot_at_boot;
191 
192 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
193 static int boot_instance_index;
194 
195 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
196 static int boot_snapshot_index;
197 
set_cmdline_ftrace(char * str)198 static int __init set_cmdline_ftrace(char *str)
199 {
200 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
201 	default_bootup_tracer = bootup_tracer_buf;
202 	/* We are using ftrace early, expand it */
203 	trace_set_ring_buffer_expanded(NULL);
204 	return 1;
205 }
206 __setup("ftrace=", set_cmdline_ftrace);
207 
ftrace_dump_on_oops_enabled(void)208 int ftrace_dump_on_oops_enabled(void)
209 {
210 	if (!strcmp("0", ftrace_dump_on_oops))
211 		return 0;
212 	else
213 		return 1;
214 }
215 
set_ftrace_dump_on_oops(char * str)216 static int __init set_ftrace_dump_on_oops(char *str)
217 {
218 	if (!*str) {
219 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
220 		return 1;
221 	}
222 
223 	if (*str == ',') {
224 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
225 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
226 		return 1;
227 	}
228 
229 	if (*str++ == '=') {
230 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
231 		return 1;
232 	}
233 
234 	return 0;
235 }
236 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
237 
stop_trace_on_warning(char * str)238 static int __init stop_trace_on_warning(char *str)
239 {
240 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
241 		__disable_trace_on_warning = 1;
242 	return 1;
243 }
244 __setup("traceoff_on_warning", stop_trace_on_warning);
245 
boot_alloc_snapshot(char * str)246 static int __init boot_alloc_snapshot(char *str)
247 {
248 	char *slot = boot_snapshot_info + boot_snapshot_index;
249 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
250 	int ret;
251 
252 	if (str[0] == '=') {
253 		str++;
254 		if (strlen(str) >= left)
255 			return -1;
256 
257 		ret = snprintf(slot, left, "%s\t", str);
258 		boot_snapshot_index += ret;
259 	} else {
260 		allocate_snapshot = true;
261 		/* We also need the main ring buffer expanded */
262 		trace_set_ring_buffer_expanded(NULL);
263 	}
264 	return 1;
265 }
266 __setup("alloc_snapshot", boot_alloc_snapshot);
267 
268 
boot_snapshot(char * str)269 static int __init boot_snapshot(char *str)
270 {
271 	snapshot_at_boot = true;
272 	boot_alloc_snapshot(str);
273 	return 1;
274 }
275 __setup("ftrace_boot_snapshot", boot_snapshot);
276 
277 
boot_instance(char * str)278 static int __init boot_instance(char *str)
279 {
280 	char *slot = boot_instance_info + boot_instance_index;
281 	int left = sizeof(boot_instance_info) - boot_instance_index;
282 	int ret;
283 
284 	if (strlen(str) >= left)
285 		return -1;
286 
287 	ret = snprintf(slot, left, "%s\t", str);
288 	boot_instance_index += ret;
289 
290 	return 1;
291 }
292 __setup("trace_instance=", boot_instance);
293 
294 
295 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
296 
set_trace_boot_options(char * str)297 static int __init set_trace_boot_options(char *str)
298 {
299 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
300 	return 1;
301 }
302 __setup("trace_options=", set_trace_boot_options);
303 
304 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
305 static char *trace_boot_clock __initdata;
306 
set_trace_boot_clock(char * str)307 static int __init set_trace_boot_clock(char *str)
308 {
309 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
310 	trace_boot_clock = trace_boot_clock_buf;
311 	return 1;
312 }
313 __setup("trace_clock=", set_trace_boot_clock);
314 
set_tracepoint_printk(char * str)315 static int __init set_tracepoint_printk(char *str)
316 {
317 	/* Ignore the "tp_printk_stop_on_boot" param */
318 	if (*str == '_')
319 		return 0;
320 
321 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
322 		tracepoint_printk = 1;
323 	return 1;
324 }
325 __setup("tp_printk", set_tracepoint_printk);
326 
set_tracepoint_printk_stop(char * str)327 static int __init set_tracepoint_printk_stop(char *str)
328 {
329 	tracepoint_printk_stop_on_boot = true;
330 	return 1;
331 }
332 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
333 
ns2usecs(u64 nsec)334 unsigned long long ns2usecs(u64 nsec)
335 {
336 	nsec += 500;
337 	do_div(nsec, 1000);
338 	return nsec;
339 }
340 
341 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)342 trace_process_export(struct trace_export *export,
343 	       struct ring_buffer_event *event, int flag)
344 {
345 	struct trace_entry *entry;
346 	unsigned int size = 0;
347 
348 	if (export->flags & flag) {
349 		entry = ring_buffer_event_data(event);
350 		size = ring_buffer_event_length(event);
351 		export->write(export, entry, size);
352 	}
353 }
354 
355 static DEFINE_MUTEX(ftrace_export_lock);
356 
357 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
358 
359 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
360 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
361 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
362 
ftrace_exports_enable(struct trace_export * export)363 static inline void ftrace_exports_enable(struct trace_export *export)
364 {
365 	if (export->flags & TRACE_EXPORT_FUNCTION)
366 		static_branch_inc(&trace_function_exports_enabled);
367 
368 	if (export->flags & TRACE_EXPORT_EVENT)
369 		static_branch_inc(&trace_event_exports_enabled);
370 
371 	if (export->flags & TRACE_EXPORT_MARKER)
372 		static_branch_inc(&trace_marker_exports_enabled);
373 }
374 
ftrace_exports_disable(struct trace_export * export)375 static inline void ftrace_exports_disable(struct trace_export *export)
376 {
377 	if (export->flags & TRACE_EXPORT_FUNCTION)
378 		static_branch_dec(&trace_function_exports_enabled);
379 
380 	if (export->flags & TRACE_EXPORT_EVENT)
381 		static_branch_dec(&trace_event_exports_enabled);
382 
383 	if (export->flags & TRACE_EXPORT_MARKER)
384 		static_branch_dec(&trace_marker_exports_enabled);
385 }
386 
ftrace_exports(struct ring_buffer_event * event,int flag)387 static void ftrace_exports(struct ring_buffer_event *event, int flag)
388 {
389 	struct trace_export *export;
390 
391 	preempt_disable_notrace();
392 
393 	export = rcu_dereference_raw_check(ftrace_exports_list);
394 	while (export) {
395 		trace_process_export(export, event, flag);
396 		export = rcu_dereference_raw_check(export->next);
397 	}
398 
399 	preempt_enable_notrace();
400 }
401 
402 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)403 add_trace_export(struct trace_export **list, struct trace_export *export)
404 {
405 	rcu_assign_pointer(export->next, *list);
406 	/*
407 	 * We are entering export into the list but another
408 	 * CPU might be walking that list. We need to make sure
409 	 * the export->next pointer is valid before another CPU sees
410 	 * the export pointer included into the list.
411 	 */
412 	rcu_assign_pointer(*list, export);
413 }
414 
415 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)416 rm_trace_export(struct trace_export **list, struct trace_export *export)
417 {
418 	struct trace_export **p;
419 
420 	for (p = list; *p != NULL; p = &(*p)->next)
421 		if (*p == export)
422 			break;
423 
424 	if (*p != export)
425 		return -1;
426 
427 	rcu_assign_pointer(*p, (*p)->next);
428 
429 	return 0;
430 }
431 
432 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)433 add_ftrace_export(struct trace_export **list, struct trace_export *export)
434 {
435 	ftrace_exports_enable(export);
436 
437 	add_trace_export(list, export);
438 }
439 
440 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)441 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
442 {
443 	int ret;
444 
445 	ret = rm_trace_export(list, export);
446 	ftrace_exports_disable(export);
447 
448 	return ret;
449 }
450 
register_ftrace_export(struct trace_export * export)451 int register_ftrace_export(struct trace_export *export)
452 {
453 	if (WARN_ON_ONCE(!export->write))
454 		return -1;
455 
456 	mutex_lock(&ftrace_export_lock);
457 
458 	add_ftrace_export(&ftrace_exports_list, export);
459 
460 	mutex_unlock(&ftrace_export_lock);
461 
462 	return 0;
463 }
464 EXPORT_SYMBOL_GPL(register_ftrace_export);
465 
unregister_ftrace_export(struct trace_export * export)466 int unregister_ftrace_export(struct trace_export *export)
467 {
468 	int ret;
469 
470 	mutex_lock(&ftrace_export_lock);
471 
472 	ret = rm_ftrace_export(&ftrace_exports_list, export);
473 
474 	mutex_unlock(&ftrace_export_lock);
475 
476 	return ret;
477 }
478 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
479 
480 /* trace_flags holds trace_options default values */
481 #define TRACE_DEFAULT_FLAGS						\
482 	(FUNCTION_DEFAULT_FLAGS |					\
483 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
484 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
485 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
486 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
487 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
488 
489 /* trace_options that are only supported by global_trace */
490 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
491 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
492 
493 /* trace_flags that are default zero for instances */
494 #define ZEROED_TRACE_FLAGS \
495 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
496 
497 /*
498  * The global_trace is the descriptor that holds the top-level tracing
499  * buffers for the live tracing.
500  */
501 static struct trace_array global_trace = {
502 	.trace_flags = TRACE_DEFAULT_FLAGS,
503 };
504 
505 static struct trace_array *printk_trace = &global_trace;
506 
printk_binsafe(struct trace_array * tr)507 static __always_inline bool printk_binsafe(struct trace_array *tr)
508 {
509 	/*
510 	 * The binary format of traceprintk can cause a crash if used
511 	 * by a buffer from another boot. Force the use of the
512 	 * non binary version of trace_printk if the trace_printk
513 	 * buffer is a boot mapped ring buffer.
514 	 */
515 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
516 }
517 
update_printk_trace(struct trace_array * tr)518 static void update_printk_trace(struct trace_array *tr)
519 {
520 	if (printk_trace == tr)
521 		return;
522 
523 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
524 	printk_trace = tr;
525 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
526 }
527 
trace_set_ring_buffer_expanded(struct trace_array * tr)528 void trace_set_ring_buffer_expanded(struct trace_array *tr)
529 {
530 	if (!tr)
531 		tr = &global_trace;
532 	tr->ring_buffer_expanded = true;
533 }
534 
535 LIST_HEAD(ftrace_trace_arrays);
536 
trace_array_get(struct trace_array * this_tr)537 int trace_array_get(struct trace_array *this_tr)
538 {
539 	struct trace_array *tr;
540 
541 	guard(mutex)(&trace_types_lock);
542 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
543 		if (tr == this_tr) {
544 			tr->ref++;
545 			return 0;
546 		}
547 	}
548 
549 	return -ENODEV;
550 }
551 
__trace_array_put(struct trace_array * this_tr)552 static void __trace_array_put(struct trace_array *this_tr)
553 {
554 	WARN_ON(!this_tr->ref);
555 	this_tr->ref--;
556 }
557 
558 /**
559  * trace_array_put - Decrement the reference counter for this trace array.
560  * @this_tr : pointer to the trace array
561  *
562  * NOTE: Use this when we no longer need the trace array returned by
563  * trace_array_get_by_name(). This ensures the trace array can be later
564  * destroyed.
565  *
566  */
trace_array_put(struct trace_array * this_tr)567 void trace_array_put(struct trace_array *this_tr)
568 {
569 	if (!this_tr)
570 		return;
571 
572 	mutex_lock(&trace_types_lock);
573 	__trace_array_put(this_tr);
574 	mutex_unlock(&trace_types_lock);
575 }
576 EXPORT_SYMBOL_GPL(trace_array_put);
577 
tracing_check_open_get_tr(struct trace_array * tr)578 int tracing_check_open_get_tr(struct trace_array *tr)
579 {
580 	int ret;
581 
582 	ret = security_locked_down(LOCKDOWN_TRACEFS);
583 	if (ret)
584 		return ret;
585 
586 	if (tracing_disabled)
587 		return -ENODEV;
588 
589 	if (tr && trace_array_get(tr) < 0)
590 		return -ENODEV;
591 
592 	return 0;
593 }
594 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)595 int call_filter_check_discard(struct trace_event_call *call, void *rec,
596 			      struct trace_buffer *buffer,
597 			      struct ring_buffer_event *event)
598 {
599 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
600 	    !filter_match_preds(call->filter, rec)) {
601 		__trace_event_discard_commit(buffer, event);
602 		return 1;
603 	}
604 
605 	return 0;
606 }
607 
608 /**
609  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
610  * @filtered_pids: The list of pids to check
611  * @search_pid: The PID to find in @filtered_pids
612  *
613  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
614  */
615 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)616 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
617 {
618 	return trace_pid_list_is_set(filtered_pids, search_pid);
619 }
620 
621 /**
622  * trace_ignore_this_task - should a task be ignored for tracing
623  * @filtered_pids: The list of pids to check
624  * @filtered_no_pids: The list of pids not to be traced
625  * @task: The task that should be ignored if not filtered
626  *
627  * Checks if @task should be traced or not from @filtered_pids.
628  * Returns true if @task should *NOT* be traced.
629  * Returns false if @task should be traced.
630  */
631 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)632 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
633 		       struct trace_pid_list *filtered_no_pids,
634 		       struct task_struct *task)
635 {
636 	/*
637 	 * If filtered_no_pids is not empty, and the task's pid is listed
638 	 * in filtered_no_pids, then return true.
639 	 * Otherwise, if filtered_pids is empty, that means we can
640 	 * trace all tasks. If it has content, then only trace pids
641 	 * within filtered_pids.
642 	 */
643 
644 	return (filtered_pids &&
645 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
646 		(filtered_no_pids &&
647 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
648 }
649 
650 /**
651  * trace_filter_add_remove_task - Add or remove a task from a pid_list
652  * @pid_list: The list to modify
653  * @self: The current task for fork or NULL for exit
654  * @task: The task to add or remove
655  *
656  * If adding a task, if @self is defined, the task is only added if @self
657  * is also included in @pid_list. This happens on fork and tasks should
658  * only be added when the parent is listed. If @self is NULL, then the
659  * @task pid will be removed from the list, which would happen on exit
660  * of a task.
661  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)662 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
663 				  struct task_struct *self,
664 				  struct task_struct *task)
665 {
666 	if (!pid_list)
667 		return;
668 
669 	/* For forks, we only add if the forking task is listed */
670 	if (self) {
671 		if (!trace_find_filtered_pid(pid_list, self->pid))
672 			return;
673 	}
674 
675 	/* "self" is set for forks, and NULL for exits */
676 	if (self)
677 		trace_pid_list_set(pid_list, task->pid);
678 	else
679 		trace_pid_list_clear(pid_list, task->pid);
680 }
681 
682 /**
683  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
684  * @pid_list: The pid list to show
685  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
686  * @pos: The position of the file
687  *
688  * This is used by the seq_file "next" operation to iterate the pids
689  * listed in a trace_pid_list structure.
690  *
691  * Returns the pid+1 as we want to display pid of zero, but NULL would
692  * stop the iteration.
693  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)694 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
695 {
696 	long pid = (unsigned long)v;
697 	unsigned int next;
698 
699 	(*pos)++;
700 
701 	/* pid already is +1 of the actual previous bit */
702 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
703 		return NULL;
704 
705 	pid = next;
706 
707 	/* Return pid + 1 to allow zero to be represented */
708 	return (void *)(pid + 1);
709 }
710 
711 /**
712  * trace_pid_start - Used for seq_file to start reading pid lists
713  * @pid_list: The pid list to show
714  * @pos: The position of the file
715  *
716  * This is used by seq_file "start" operation to start the iteration
717  * of listing pids.
718  *
719  * Returns the pid+1 as we want to display pid of zero, but NULL would
720  * stop the iteration.
721  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)722 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
723 {
724 	unsigned long pid;
725 	unsigned int first;
726 	loff_t l = 0;
727 
728 	if (trace_pid_list_first(pid_list, &first) < 0)
729 		return NULL;
730 
731 	pid = first;
732 
733 	/* Return pid + 1 so that zero can be the exit value */
734 	for (pid++; pid && l < *pos;
735 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
736 		;
737 	return (void *)pid;
738 }
739 
740 /**
741  * trace_pid_show - show the current pid in seq_file processing
742  * @m: The seq_file structure to write into
743  * @v: A void pointer of the pid (+1) value to display
744  *
745  * Can be directly used by seq_file operations to display the current
746  * pid value.
747  */
trace_pid_show(struct seq_file * m,void * v)748 int trace_pid_show(struct seq_file *m, void *v)
749 {
750 	unsigned long pid = (unsigned long)v - 1;
751 
752 	seq_printf(m, "%lu\n", pid);
753 	return 0;
754 }
755 
756 /* 128 should be much more than enough */
757 #define PID_BUF_SIZE		127
758 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)759 int trace_pid_write(struct trace_pid_list *filtered_pids,
760 		    struct trace_pid_list **new_pid_list,
761 		    const char __user *ubuf, size_t cnt)
762 {
763 	struct trace_pid_list *pid_list;
764 	struct trace_parser parser;
765 	unsigned long val;
766 	int nr_pids = 0;
767 	ssize_t read = 0;
768 	ssize_t ret;
769 	loff_t pos;
770 	pid_t pid;
771 
772 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
773 		return -ENOMEM;
774 
775 	/*
776 	 * Always recreate a new array. The write is an all or nothing
777 	 * operation. Always create a new array when adding new pids by
778 	 * the user. If the operation fails, then the current list is
779 	 * not modified.
780 	 */
781 	pid_list = trace_pid_list_alloc();
782 	if (!pid_list) {
783 		trace_parser_put(&parser);
784 		return -ENOMEM;
785 	}
786 
787 	if (filtered_pids) {
788 		/* copy the current bits to the new max */
789 		ret = trace_pid_list_first(filtered_pids, &pid);
790 		while (!ret) {
791 			ret = trace_pid_list_set(pid_list, pid);
792 			if (ret < 0)
793 				goto out;
794 
795 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
796 			nr_pids++;
797 		}
798 	}
799 
800 	ret = 0;
801 	while (cnt > 0) {
802 
803 		pos = 0;
804 
805 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
806 		if (ret < 0)
807 			break;
808 
809 		read += ret;
810 		ubuf += ret;
811 		cnt -= ret;
812 
813 		if (!trace_parser_loaded(&parser))
814 			break;
815 
816 		ret = -EINVAL;
817 		if (kstrtoul(parser.buffer, 0, &val))
818 			break;
819 
820 		pid = (pid_t)val;
821 
822 		if (trace_pid_list_set(pid_list, pid) < 0) {
823 			ret = -1;
824 			break;
825 		}
826 		nr_pids++;
827 
828 		trace_parser_clear(&parser);
829 		ret = 0;
830 	}
831  out:
832 	trace_parser_put(&parser);
833 
834 	if (ret < 0) {
835 		trace_pid_list_free(pid_list);
836 		return ret;
837 	}
838 
839 	if (!nr_pids) {
840 		/* Cleared the list of pids */
841 		trace_pid_list_free(pid_list);
842 		pid_list = NULL;
843 	}
844 
845 	*new_pid_list = pid_list;
846 
847 	return read;
848 }
849 
buffer_ftrace_now(struct array_buffer * buf,int cpu)850 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
851 {
852 	u64 ts;
853 
854 	/* Early boot up does not have a buffer yet */
855 	if (!buf->buffer)
856 		return trace_clock_local();
857 
858 	ts = ring_buffer_time_stamp(buf->buffer);
859 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
860 
861 	return ts;
862 }
863 
ftrace_now(int cpu)864 u64 ftrace_now(int cpu)
865 {
866 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
867 }
868 
869 /**
870  * tracing_is_enabled - Show if global_trace has been enabled
871  *
872  * Shows if the global trace has been enabled or not. It uses the
873  * mirror flag "buffer_disabled" to be used in fast paths such as for
874  * the irqsoff tracer. But it may be inaccurate due to races. If you
875  * need to know the accurate state, use tracing_is_on() which is a little
876  * slower, but accurate.
877  */
tracing_is_enabled(void)878 int tracing_is_enabled(void)
879 {
880 	/*
881 	 * For quick access (irqsoff uses this in fast path), just
882 	 * return the mirror variable of the state of the ring buffer.
883 	 * It's a little racy, but we don't really care.
884 	 */
885 	smp_rmb();
886 	return !global_trace.buffer_disabled;
887 }
888 
889 /*
890  * trace_buf_size is the size in bytes that is allocated
891  * for a buffer. Note, the number of bytes is always rounded
892  * to page size.
893  *
894  * This number is purposely set to a low number of 16384.
895  * If the dump on oops happens, it will be much appreciated
896  * to not have to wait for all that output. Anyway this can be
897  * boot time and run time configurable.
898  */
899 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
900 
901 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
902 
903 /* trace_types holds a link list of available tracers. */
904 static struct tracer		*trace_types __read_mostly;
905 
906 /*
907  * trace_types_lock is used to protect the trace_types list.
908  */
909 DEFINE_MUTEX(trace_types_lock);
910 
911 /*
912  * serialize the access of the ring buffer
913  *
914  * ring buffer serializes readers, but it is low level protection.
915  * The validity of the events (which returns by ring_buffer_peek() ..etc)
916  * are not protected by ring buffer.
917  *
918  * The content of events may become garbage if we allow other process consumes
919  * these events concurrently:
920  *   A) the page of the consumed events may become a normal page
921  *      (not reader page) in ring buffer, and this page will be rewritten
922  *      by events producer.
923  *   B) The page of the consumed events may become a page for splice_read,
924  *      and this page will be returned to system.
925  *
926  * These primitives allow multi process access to different cpu ring buffer
927  * concurrently.
928  *
929  * These primitives don't distinguish read-only and read-consume access.
930  * Multi read-only access are also serialized.
931  */
932 
933 #ifdef CONFIG_SMP
934 static DECLARE_RWSEM(all_cpu_access_lock);
935 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
936 
trace_access_lock(int cpu)937 static inline void trace_access_lock(int cpu)
938 {
939 	if (cpu == RING_BUFFER_ALL_CPUS) {
940 		/* gain it for accessing the whole ring buffer. */
941 		down_write(&all_cpu_access_lock);
942 	} else {
943 		/* gain it for accessing a cpu ring buffer. */
944 
945 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
946 		down_read(&all_cpu_access_lock);
947 
948 		/* Secondly block other access to this @cpu ring buffer. */
949 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
950 	}
951 }
952 
trace_access_unlock(int cpu)953 static inline void trace_access_unlock(int cpu)
954 {
955 	if (cpu == RING_BUFFER_ALL_CPUS) {
956 		up_write(&all_cpu_access_lock);
957 	} else {
958 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
959 		up_read(&all_cpu_access_lock);
960 	}
961 }
962 
trace_access_lock_init(void)963 static inline void trace_access_lock_init(void)
964 {
965 	int cpu;
966 
967 	for_each_possible_cpu(cpu)
968 		mutex_init(&per_cpu(cpu_access_lock, cpu));
969 }
970 
971 #else
972 
973 static DEFINE_MUTEX(access_lock);
974 
trace_access_lock(int cpu)975 static inline void trace_access_lock(int cpu)
976 {
977 	(void)cpu;
978 	mutex_lock(&access_lock);
979 }
980 
trace_access_unlock(int cpu)981 static inline void trace_access_unlock(int cpu)
982 {
983 	(void)cpu;
984 	mutex_unlock(&access_lock);
985 }
986 
trace_access_lock_init(void)987 static inline void trace_access_lock_init(void)
988 {
989 }
990 
991 #endif
992 
993 #ifdef CONFIG_STACKTRACE
994 static void __ftrace_trace_stack(struct trace_array *tr,
995 				 struct trace_buffer *buffer,
996 				 unsigned int trace_ctx,
997 				 int skip, struct pt_regs *regs);
998 static inline void ftrace_trace_stack(struct trace_array *tr,
999 				      struct trace_buffer *buffer,
1000 				      unsigned int trace_ctx,
1001 				      int skip, struct pt_regs *regs);
1002 
1003 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1004 static inline void __ftrace_trace_stack(struct trace_array *tr,
1005 					struct trace_buffer *buffer,
1006 					unsigned int trace_ctx,
1007 					int skip, struct pt_regs *regs)
1008 {
1009 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1010 static inline void ftrace_trace_stack(struct trace_array *tr,
1011 				      struct trace_buffer *buffer,
1012 				      unsigned long trace_ctx,
1013 				      int skip, struct pt_regs *regs)
1014 {
1015 }
1016 
1017 #endif
1018 
1019 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1020 trace_event_setup(struct ring_buffer_event *event,
1021 		  int type, unsigned int trace_ctx)
1022 {
1023 	struct trace_entry *ent = ring_buffer_event_data(event);
1024 
1025 	tracing_generic_entry_update(ent, type, trace_ctx);
1026 }
1027 
1028 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1029 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1030 			  int type,
1031 			  unsigned long len,
1032 			  unsigned int trace_ctx)
1033 {
1034 	struct ring_buffer_event *event;
1035 
1036 	event = ring_buffer_lock_reserve(buffer, len);
1037 	if (event != NULL)
1038 		trace_event_setup(event, type, trace_ctx);
1039 
1040 	return event;
1041 }
1042 
tracer_tracing_on(struct trace_array * tr)1043 void tracer_tracing_on(struct trace_array *tr)
1044 {
1045 	if (tr->array_buffer.buffer)
1046 		ring_buffer_record_on(tr->array_buffer.buffer);
1047 	/*
1048 	 * This flag is looked at when buffers haven't been allocated
1049 	 * yet, or by some tracers (like irqsoff), that just want to
1050 	 * know if the ring buffer has been disabled, but it can handle
1051 	 * races of where it gets disabled but we still do a record.
1052 	 * As the check is in the fast path of the tracers, it is more
1053 	 * important to be fast than accurate.
1054 	 */
1055 	tr->buffer_disabled = 0;
1056 	/* Make the flag seen by readers */
1057 	smp_wmb();
1058 }
1059 
1060 /**
1061  * tracing_on - enable tracing buffers
1062  *
1063  * This function enables tracing buffers that may have been
1064  * disabled with tracing_off.
1065  */
tracing_on(void)1066 void tracing_on(void)
1067 {
1068 	tracer_tracing_on(&global_trace);
1069 }
1070 
1071 
1072 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1073 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1074 {
1075 	__this_cpu_write(trace_taskinfo_save, true);
1076 
1077 	/* If this is the temp buffer, we need to commit fully */
1078 	if (this_cpu_read(trace_buffered_event) == event) {
1079 		/* Length is in event->array[0] */
1080 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1081 		/* Release the temp buffer */
1082 		this_cpu_dec(trace_buffered_event_cnt);
1083 		/* ring_buffer_unlock_commit() enables preemption */
1084 		preempt_enable_notrace();
1085 	} else
1086 		ring_buffer_unlock_commit(buffer);
1087 }
1088 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1089 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1090 		       const char *str, int size)
1091 {
1092 	struct ring_buffer_event *event;
1093 	struct trace_buffer *buffer;
1094 	struct print_entry *entry;
1095 	unsigned int trace_ctx;
1096 	int alloc;
1097 
1098 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1099 		return 0;
1100 
1101 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1102 		return 0;
1103 
1104 	if (unlikely(tracing_disabled))
1105 		return 0;
1106 
1107 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1108 
1109 	trace_ctx = tracing_gen_ctx();
1110 	buffer = tr->array_buffer.buffer;
1111 	ring_buffer_nest_start(buffer);
1112 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1113 					    trace_ctx);
1114 	if (!event) {
1115 		size = 0;
1116 		goto out;
1117 	}
1118 
1119 	entry = ring_buffer_event_data(event);
1120 	entry->ip = ip;
1121 
1122 	memcpy(&entry->buf, str, size);
1123 
1124 	/* Add a newline if necessary */
1125 	if (entry->buf[size - 1] != '\n') {
1126 		entry->buf[size] = '\n';
1127 		entry->buf[size + 1] = '\0';
1128 	} else
1129 		entry->buf[size] = '\0';
1130 
1131 	__buffer_unlock_commit(buffer, event);
1132 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1133  out:
1134 	ring_buffer_nest_end(buffer);
1135 	return size;
1136 }
1137 EXPORT_SYMBOL_GPL(__trace_array_puts);
1138 
1139 /**
1140  * __trace_puts - write a constant string into the trace buffer.
1141  * @ip:	   The address of the caller
1142  * @str:   The constant string to write
1143  * @size:  The size of the string.
1144  */
__trace_puts(unsigned long ip,const char * str,int size)1145 int __trace_puts(unsigned long ip, const char *str, int size)
1146 {
1147 	return __trace_array_puts(printk_trace, ip, str, size);
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_puts);
1150 
1151 /**
1152  * __trace_bputs - write the pointer to a constant string into trace buffer
1153  * @ip:	   The address of the caller
1154  * @str:   The constant string to write to the buffer to
1155  */
__trace_bputs(unsigned long ip,const char * str)1156 int __trace_bputs(unsigned long ip, const char *str)
1157 {
1158 	struct trace_array *tr = READ_ONCE(printk_trace);
1159 	struct ring_buffer_event *event;
1160 	struct trace_buffer *buffer;
1161 	struct bputs_entry *entry;
1162 	unsigned int trace_ctx;
1163 	int size = sizeof(struct bputs_entry);
1164 	int ret = 0;
1165 
1166 	if (!printk_binsafe(tr))
1167 		return __trace_puts(ip, str, strlen(str));
1168 
1169 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1170 		return 0;
1171 
1172 	if (unlikely(tracing_selftest_running || tracing_disabled))
1173 		return 0;
1174 
1175 	trace_ctx = tracing_gen_ctx();
1176 	buffer = tr->array_buffer.buffer;
1177 
1178 	ring_buffer_nest_start(buffer);
1179 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1180 					    trace_ctx);
1181 	if (!event)
1182 		goto out;
1183 
1184 	entry = ring_buffer_event_data(event);
1185 	entry->ip			= ip;
1186 	entry->str			= str;
1187 
1188 	__buffer_unlock_commit(buffer, event);
1189 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1190 
1191 	ret = 1;
1192  out:
1193 	ring_buffer_nest_end(buffer);
1194 	return ret;
1195 }
1196 EXPORT_SYMBOL_GPL(__trace_bputs);
1197 
1198 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1199 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1200 					   void *cond_data)
1201 {
1202 	struct tracer *tracer = tr->current_trace;
1203 	unsigned long flags;
1204 
1205 	if (in_nmi()) {
1206 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1207 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1208 		return;
1209 	}
1210 
1211 	if (!tr->allocated_snapshot) {
1212 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1213 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1214 		tracer_tracing_off(tr);
1215 		return;
1216 	}
1217 
1218 	/* Note, snapshot can not be used when the tracer uses it */
1219 	if (tracer->use_max_tr) {
1220 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1221 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1222 		return;
1223 	}
1224 
1225 	if (tr->mapped) {
1226 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1227 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1228 		return;
1229 	}
1230 
1231 	local_irq_save(flags);
1232 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1233 	local_irq_restore(flags);
1234 }
1235 
tracing_snapshot_instance(struct trace_array * tr)1236 void tracing_snapshot_instance(struct trace_array *tr)
1237 {
1238 	tracing_snapshot_instance_cond(tr, NULL);
1239 }
1240 
1241 /**
1242  * tracing_snapshot - take a snapshot of the current buffer.
1243  *
1244  * This causes a swap between the snapshot buffer and the current live
1245  * tracing buffer. You can use this to take snapshots of the live
1246  * trace when some condition is triggered, but continue to trace.
1247  *
1248  * Note, make sure to allocate the snapshot with either
1249  * a tracing_snapshot_alloc(), or by doing it manually
1250  * with: echo 1 > /sys/kernel/tracing/snapshot
1251  *
1252  * If the snapshot buffer is not allocated, it will stop tracing.
1253  * Basically making a permanent snapshot.
1254  */
tracing_snapshot(void)1255 void tracing_snapshot(void)
1256 {
1257 	struct trace_array *tr = &global_trace;
1258 
1259 	tracing_snapshot_instance(tr);
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot);
1262 
1263 /**
1264  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1265  * @tr:		The tracing instance to snapshot
1266  * @cond_data:	The data to be tested conditionally, and possibly saved
1267  *
1268  * This is the same as tracing_snapshot() except that the snapshot is
1269  * conditional - the snapshot will only happen if the
1270  * cond_snapshot.update() implementation receiving the cond_data
1271  * returns true, which means that the trace array's cond_snapshot
1272  * update() operation used the cond_data to determine whether the
1273  * snapshot should be taken, and if it was, presumably saved it along
1274  * with the snapshot.
1275  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1276 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1277 {
1278 	tracing_snapshot_instance_cond(tr, cond_data);
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1281 
1282 /**
1283  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1284  * @tr:		The tracing instance
1285  *
1286  * When the user enables a conditional snapshot using
1287  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1288  * with the snapshot.  This accessor is used to retrieve it.
1289  *
1290  * Should not be called from cond_snapshot.update(), since it takes
1291  * the tr->max_lock lock, which the code calling
1292  * cond_snapshot.update() has already done.
1293  *
1294  * Returns the cond_data associated with the trace array's snapshot.
1295  */
tracing_cond_snapshot_data(struct trace_array * tr)1296 void *tracing_cond_snapshot_data(struct trace_array *tr)
1297 {
1298 	void *cond_data = NULL;
1299 
1300 	local_irq_disable();
1301 	arch_spin_lock(&tr->max_lock);
1302 
1303 	if (tr->cond_snapshot)
1304 		cond_data = tr->cond_snapshot->cond_data;
1305 
1306 	arch_spin_unlock(&tr->max_lock);
1307 	local_irq_enable();
1308 
1309 	return cond_data;
1310 }
1311 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1312 
1313 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1314 					struct array_buffer *size_buf, int cpu_id);
1315 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1316 
tracing_alloc_snapshot_instance(struct trace_array * tr)1317 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1318 {
1319 	int order;
1320 	int ret;
1321 
1322 	if (!tr->allocated_snapshot) {
1323 
1324 		/* Make the snapshot buffer have the same order as main buffer */
1325 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1326 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1327 		if (ret < 0)
1328 			return ret;
1329 
1330 		/* allocate spare buffer */
1331 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1332 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1333 		if (ret < 0)
1334 			return ret;
1335 
1336 		tr->allocated_snapshot = true;
1337 	}
1338 
1339 	return 0;
1340 }
1341 
free_snapshot(struct trace_array * tr)1342 static void free_snapshot(struct trace_array *tr)
1343 {
1344 	/*
1345 	 * We don't free the ring buffer. instead, resize it because
1346 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1347 	 * we want preserve it.
1348 	 */
1349 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1350 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1351 	set_buffer_entries(&tr->max_buffer, 1);
1352 	tracing_reset_online_cpus(&tr->max_buffer);
1353 	tr->allocated_snapshot = false;
1354 }
1355 
tracing_arm_snapshot_locked(struct trace_array * tr)1356 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1357 {
1358 	int ret;
1359 
1360 	lockdep_assert_held(&trace_types_lock);
1361 
1362 	spin_lock(&tr->snapshot_trigger_lock);
1363 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1364 		spin_unlock(&tr->snapshot_trigger_lock);
1365 		return -EBUSY;
1366 	}
1367 
1368 	tr->snapshot++;
1369 	spin_unlock(&tr->snapshot_trigger_lock);
1370 
1371 	ret = tracing_alloc_snapshot_instance(tr);
1372 	if (ret) {
1373 		spin_lock(&tr->snapshot_trigger_lock);
1374 		tr->snapshot--;
1375 		spin_unlock(&tr->snapshot_trigger_lock);
1376 	}
1377 
1378 	return ret;
1379 }
1380 
tracing_arm_snapshot(struct trace_array * tr)1381 int tracing_arm_snapshot(struct trace_array *tr)
1382 {
1383 	int ret;
1384 
1385 	mutex_lock(&trace_types_lock);
1386 	ret = tracing_arm_snapshot_locked(tr);
1387 	mutex_unlock(&trace_types_lock);
1388 
1389 	return ret;
1390 }
1391 
tracing_disarm_snapshot(struct trace_array * tr)1392 void tracing_disarm_snapshot(struct trace_array *tr)
1393 {
1394 	spin_lock(&tr->snapshot_trigger_lock);
1395 	if (!WARN_ON(!tr->snapshot))
1396 		tr->snapshot--;
1397 	spin_unlock(&tr->snapshot_trigger_lock);
1398 }
1399 
1400 /**
1401  * tracing_alloc_snapshot - allocate snapshot buffer.
1402  *
1403  * This only allocates the snapshot buffer if it isn't already
1404  * allocated - it doesn't also take a snapshot.
1405  *
1406  * This is meant to be used in cases where the snapshot buffer needs
1407  * to be set up for events that can't sleep but need to be able to
1408  * trigger a snapshot.
1409  */
tracing_alloc_snapshot(void)1410 int tracing_alloc_snapshot(void)
1411 {
1412 	struct trace_array *tr = &global_trace;
1413 	int ret;
1414 
1415 	ret = tracing_alloc_snapshot_instance(tr);
1416 	WARN_ON(ret < 0);
1417 
1418 	return ret;
1419 }
1420 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1421 
1422 /**
1423  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1424  *
1425  * This is similar to tracing_snapshot(), but it will allocate the
1426  * snapshot buffer if it isn't already allocated. Use this only
1427  * where it is safe to sleep, as the allocation may sleep.
1428  *
1429  * This causes a swap between the snapshot buffer and the current live
1430  * tracing buffer. You can use this to take snapshots of the live
1431  * trace when some condition is triggered, but continue to trace.
1432  */
tracing_snapshot_alloc(void)1433 void tracing_snapshot_alloc(void)
1434 {
1435 	int ret;
1436 
1437 	ret = tracing_alloc_snapshot();
1438 	if (ret < 0)
1439 		return;
1440 
1441 	tracing_snapshot();
1442 }
1443 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1444 
1445 /**
1446  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1447  * @tr:		The tracing instance
1448  * @cond_data:	User data to associate with the snapshot
1449  * @update:	Implementation of the cond_snapshot update function
1450  *
1451  * Check whether the conditional snapshot for the given instance has
1452  * already been enabled, or if the current tracer is already using a
1453  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1454  * save the cond_data and update function inside.
1455  *
1456  * Returns 0 if successful, error otherwise.
1457  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1458 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1459 				 cond_update_fn_t update)
1460 {
1461 	struct cond_snapshot *cond_snapshot __free(kfree) =
1462 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1463 	int ret;
1464 
1465 	if (!cond_snapshot)
1466 		return -ENOMEM;
1467 
1468 	cond_snapshot->cond_data = cond_data;
1469 	cond_snapshot->update = update;
1470 
1471 	guard(mutex)(&trace_types_lock);
1472 
1473 	if (tr->current_trace->use_max_tr)
1474 		return -EBUSY;
1475 
1476 	/*
1477 	 * The cond_snapshot can only change to NULL without the
1478 	 * trace_types_lock. We don't care if we race with it going
1479 	 * to NULL, but we want to make sure that it's not set to
1480 	 * something other than NULL when we get here, which we can
1481 	 * do safely with only holding the trace_types_lock and not
1482 	 * having to take the max_lock.
1483 	 */
1484 	if (tr->cond_snapshot)
1485 		return -EBUSY;
1486 
1487 	ret = tracing_arm_snapshot_locked(tr);
1488 	if (ret)
1489 		return ret;
1490 
1491 	local_irq_disable();
1492 	arch_spin_lock(&tr->max_lock);
1493 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1494 	arch_spin_unlock(&tr->max_lock);
1495 	local_irq_enable();
1496 
1497 	return 0;
1498 }
1499 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1500 
1501 /**
1502  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1503  * @tr:		The tracing instance
1504  *
1505  * Check whether the conditional snapshot for the given instance is
1506  * enabled; if so, free the cond_snapshot associated with it,
1507  * otherwise return -EINVAL.
1508  *
1509  * Returns 0 if successful, error otherwise.
1510  */
tracing_snapshot_cond_disable(struct trace_array * tr)1511 int tracing_snapshot_cond_disable(struct trace_array *tr)
1512 {
1513 	int ret = 0;
1514 
1515 	local_irq_disable();
1516 	arch_spin_lock(&tr->max_lock);
1517 
1518 	if (!tr->cond_snapshot)
1519 		ret = -EINVAL;
1520 	else {
1521 		kfree(tr->cond_snapshot);
1522 		tr->cond_snapshot = NULL;
1523 	}
1524 
1525 	arch_spin_unlock(&tr->max_lock);
1526 	local_irq_enable();
1527 
1528 	tracing_disarm_snapshot(tr);
1529 
1530 	return ret;
1531 }
1532 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1533 #else
tracing_snapshot(void)1534 void tracing_snapshot(void)
1535 {
1536 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1539 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1540 {
1541 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1542 }
1543 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1544 int tracing_alloc_snapshot(void)
1545 {
1546 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1547 	return -ENODEV;
1548 }
1549 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1550 void tracing_snapshot_alloc(void)
1551 {
1552 	/* Give warning */
1553 	tracing_snapshot();
1554 }
1555 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1556 void *tracing_cond_snapshot_data(struct trace_array *tr)
1557 {
1558 	return NULL;
1559 }
1560 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1561 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1562 {
1563 	return -ENODEV;
1564 }
1565 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1566 int tracing_snapshot_cond_disable(struct trace_array *tr)
1567 {
1568 	return false;
1569 }
1570 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1571 #define free_snapshot(tr)	do { } while (0)
1572 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1573 #endif /* CONFIG_TRACER_SNAPSHOT */
1574 
tracer_tracing_off(struct trace_array * tr)1575 void tracer_tracing_off(struct trace_array *tr)
1576 {
1577 	if (tr->array_buffer.buffer)
1578 		ring_buffer_record_off(tr->array_buffer.buffer);
1579 	/*
1580 	 * This flag is looked at when buffers haven't been allocated
1581 	 * yet, or by some tracers (like irqsoff), that just want to
1582 	 * know if the ring buffer has been disabled, but it can handle
1583 	 * races of where it gets disabled but we still do a record.
1584 	 * As the check is in the fast path of the tracers, it is more
1585 	 * important to be fast than accurate.
1586 	 */
1587 	tr->buffer_disabled = 1;
1588 	/* Make the flag seen by readers */
1589 	smp_wmb();
1590 }
1591 
1592 /**
1593  * tracing_off - turn off tracing buffers
1594  *
1595  * This function stops the tracing buffers from recording data.
1596  * It does not disable any overhead the tracers themselves may
1597  * be causing. This function simply causes all recording to
1598  * the ring buffers to fail.
1599  */
tracing_off(void)1600 void tracing_off(void)
1601 {
1602 	tracer_tracing_off(&global_trace);
1603 }
1604 EXPORT_SYMBOL_GPL(tracing_off);
1605 
disable_trace_on_warning(void)1606 void disable_trace_on_warning(void)
1607 {
1608 	if (__disable_trace_on_warning) {
1609 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1610 			"Disabling tracing due to warning\n");
1611 		tracing_off();
1612 	}
1613 }
1614 
1615 /**
1616  * tracer_tracing_is_on - show real state of ring buffer enabled
1617  * @tr : the trace array to know if ring buffer is enabled
1618  *
1619  * Shows real state of the ring buffer if it is enabled or not.
1620  */
tracer_tracing_is_on(struct trace_array * tr)1621 bool tracer_tracing_is_on(struct trace_array *tr)
1622 {
1623 	if (tr->array_buffer.buffer)
1624 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1625 	return !tr->buffer_disabled;
1626 }
1627 
1628 /**
1629  * tracing_is_on - show state of ring buffers enabled
1630  */
tracing_is_on(void)1631 int tracing_is_on(void)
1632 {
1633 	return tracer_tracing_is_on(&global_trace);
1634 }
1635 
set_buf_size(char * str)1636 static int __init set_buf_size(char *str)
1637 {
1638 	unsigned long buf_size;
1639 
1640 	if (!str)
1641 		return 0;
1642 	buf_size = memparse(str, &str);
1643 	/*
1644 	 * nr_entries can not be zero and the startup
1645 	 * tests require some buffer space. Therefore
1646 	 * ensure we have at least 4096 bytes of buffer.
1647 	 */
1648 	trace_buf_size = max(4096UL, buf_size);
1649 	return 1;
1650 }
1651 __setup("trace_buf_size=", set_buf_size);
1652 
set_tracing_thresh(char * str)1653 static int __init set_tracing_thresh(char *str)
1654 {
1655 	unsigned long threshold;
1656 	int ret;
1657 
1658 	if (!str)
1659 		return 0;
1660 	ret = kstrtoul(str, 0, &threshold);
1661 	if (ret < 0)
1662 		return 0;
1663 	tracing_thresh = threshold * 1000;
1664 	return 1;
1665 }
1666 __setup("tracing_thresh=", set_tracing_thresh);
1667 
nsecs_to_usecs(unsigned long nsecs)1668 unsigned long nsecs_to_usecs(unsigned long nsecs)
1669 {
1670 	return nsecs / 1000;
1671 }
1672 
1673 /*
1674  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1675  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1676  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1677  * of strings in the order that the evals (enum) were defined.
1678  */
1679 #undef C
1680 #define C(a, b) b
1681 
1682 /* These must match the bit positions in trace_iterator_flags */
1683 static const char *trace_options[] = {
1684 	TRACE_FLAGS
1685 	NULL
1686 };
1687 
1688 static struct {
1689 	u64 (*func)(void);
1690 	const char *name;
1691 	int in_ns;		/* is this clock in nanoseconds? */
1692 } trace_clocks[] = {
1693 	{ trace_clock_local,		"local",	1 },
1694 	{ trace_clock_global,		"global",	1 },
1695 	{ trace_clock_counter,		"counter",	0 },
1696 	{ trace_clock_jiffies,		"uptime",	0 },
1697 	{ trace_clock,			"perf",		1 },
1698 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1699 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1700 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1701 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1702 	ARCH_TRACE_CLOCKS
1703 };
1704 
trace_clock_in_ns(struct trace_array * tr)1705 bool trace_clock_in_ns(struct trace_array *tr)
1706 {
1707 	if (trace_clocks[tr->clock_id].in_ns)
1708 		return true;
1709 
1710 	return false;
1711 }
1712 
1713 /*
1714  * trace_parser_get_init - gets the buffer for trace parser
1715  */
trace_parser_get_init(struct trace_parser * parser,int size)1716 int trace_parser_get_init(struct trace_parser *parser, int size)
1717 {
1718 	memset(parser, 0, sizeof(*parser));
1719 
1720 	parser->buffer = kmalloc(size, GFP_KERNEL);
1721 	if (!parser->buffer)
1722 		return 1;
1723 
1724 	parser->size = size;
1725 	return 0;
1726 }
1727 
1728 /*
1729  * trace_parser_put - frees the buffer for trace parser
1730  */
trace_parser_put(struct trace_parser * parser)1731 void trace_parser_put(struct trace_parser *parser)
1732 {
1733 	kfree(parser->buffer);
1734 	parser->buffer = NULL;
1735 }
1736 
1737 /*
1738  * trace_get_user - reads the user input string separated by  space
1739  * (matched by isspace(ch))
1740  *
1741  * For each string found the 'struct trace_parser' is updated,
1742  * and the function returns.
1743  *
1744  * Returns number of bytes read.
1745  *
1746  * See kernel/trace/trace.h for 'struct trace_parser' details.
1747  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1748 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1749 	size_t cnt, loff_t *ppos)
1750 {
1751 	char ch;
1752 	size_t read = 0;
1753 	ssize_t ret;
1754 
1755 	if (!*ppos)
1756 		trace_parser_clear(parser);
1757 
1758 	ret = get_user(ch, ubuf++);
1759 	if (ret)
1760 		goto fail;
1761 
1762 	read++;
1763 	cnt--;
1764 
1765 	/*
1766 	 * The parser is not finished with the last write,
1767 	 * continue reading the user input without skipping spaces.
1768 	 */
1769 	if (!parser->cont) {
1770 		/* skip white space */
1771 		while (cnt && isspace(ch)) {
1772 			ret = get_user(ch, ubuf++);
1773 			if (ret)
1774 				goto fail;
1775 			read++;
1776 			cnt--;
1777 		}
1778 
1779 		parser->idx = 0;
1780 
1781 		/* only spaces were written */
1782 		if (isspace(ch) || !ch) {
1783 			*ppos += read;
1784 			return read;
1785 		}
1786 	}
1787 
1788 	/* read the non-space input */
1789 	while (cnt && !isspace(ch) && ch) {
1790 		if (parser->idx < parser->size - 1)
1791 			parser->buffer[parser->idx++] = ch;
1792 		else {
1793 			ret = -EINVAL;
1794 			goto fail;
1795 		}
1796 
1797 		ret = get_user(ch, ubuf++);
1798 		if (ret)
1799 			goto fail;
1800 		read++;
1801 		cnt--;
1802 	}
1803 
1804 	/* We either got finished input or we have to wait for another call. */
1805 	if (isspace(ch) || !ch) {
1806 		parser->buffer[parser->idx] = 0;
1807 		parser->cont = false;
1808 	} else if (parser->idx < parser->size - 1) {
1809 		parser->cont = true;
1810 		parser->buffer[parser->idx++] = ch;
1811 		/* Make sure the parsed string always terminates with '\0'. */
1812 		parser->buffer[parser->idx] = 0;
1813 	} else {
1814 		ret = -EINVAL;
1815 		goto fail;
1816 	}
1817 
1818 	*ppos += read;
1819 	return read;
1820 fail:
1821 	trace_parser_fail(parser);
1822 	return ret;
1823 }
1824 
1825 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1826 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1827 {
1828 	int len;
1829 
1830 	if (trace_seq_used(s) <= s->readpos)
1831 		return -EBUSY;
1832 
1833 	len = trace_seq_used(s) - s->readpos;
1834 	if (cnt > len)
1835 		cnt = len;
1836 	memcpy(buf, s->buffer + s->readpos, cnt);
1837 
1838 	s->readpos += cnt;
1839 	return cnt;
1840 }
1841 
1842 unsigned long __read_mostly	tracing_thresh;
1843 
1844 #ifdef CONFIG_TRACER_MAX_TRACE
1845 static const struct file_operations tracing_max_lat_fops;
1846 
1847 #ifdef LATENCY_FS_NOTIFY
1848 
1849 static struct workqueue_struct *fsnotify_wq;
1850 
latency_fsnotify_workfn(struct work_struct * work)1851 static void latency_fsnotify_workfn(struct work_struct *work)
1852 {
1853 	struct trace_array *tr = container_of(work, struct trace_array,
1854 					      fsnotify_work);
1855 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1856 }
1857 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1858 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1859 {
1860 	struct trace_array *tr = container_of(iwork, struct trace_array,
1861 					      fsnotify_irqwork);
1862 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1863 }
1864 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1865 static void trace_create_maxlat_file(struct trace_array *tr,
1866 				     struct dentry *d_tracer)
1867 {
1868 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1869 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1870 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1871 					      TRACE_MODE_WRITE,
1872 					      d_tracer, tr,
1873 					      &tracing_max_lat_fops);
1874 }
1875 
latency_fsnotify_init(void)1876 __init static int latency_fsnotify_init(void)
1877 {
1878 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1879 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1880 	if (!fsnotify_wq) {
1881 		pr_err("Unable to allocate tr_max_lat_wq\n");
1882 		return -ENOMEM;
1883 	}
1884 	return 0;
1885 }
1886 
1887 late_initcall_sync(latency_fsnotify_init);
1888 
latency_fsnotify(struct trace_array * tr)1889 void latency_fsnotify(struct trace_array *tr)
1890 {
1891 	if (!fsnotify_wq)
1892 		return;
1893 	/*
1894 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1895 	 * possible that we are called from __schedule() or do_idle(), which
1896 	 * could cause a deadlock.
1897 	 */
1898 	irq_work_queue(&tr->fsnotify_irqwork);
1899 }
1900 
1901 #else /* !LATENCY_FS_NOTIFY */
1902 
1903 #define trace_create_maxlat_file(tr, d_tracer)				\
1904 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1905 			  d_tracer, tr, &tracing_max_lat_fops)
1906 
1907 #endif
1908 
1909 /*
1910  * Copy the new maximum trace into the separate maximum-trace
1911  * structure. (this way the maximum trace is permanently saved,
1912  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1913  */
1914 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1915 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1916 {
1917 	struct array_buffer *trace_buf = &tr->array_buffer;
1918 	struct array_buffer *max_buf = &tr->max_buffer;
1919 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1920 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1921 
1922 	max_buf->cpu = cpu;
1923 	max_buf->time_start = data->preempt_timestamp;
1924 
1925 	max_data->saved_latency = tr->max_latency;
1926 	max_data->critical_start = data->critical_start;
1927 	max_data->critical_end = data->critical_end;
1928 
1929 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1930 	max_data->pid = tsk->pid;
1931 	/*
1932 	 * If tsk == current, then use current_uid(), as that does not use
1933 	 * RCU. The irq tracer can be called out of RCU scope.
1934 	 */
1935 	if (tsk == current)
1936 		max_data->uid = current_uid();
1937 	else
1938 		max_data->uid = task_uid(tsk);
1939 
1940 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1941 	max_data->policy = tsk->policy;
1942 	max_data->rt_priority = tsk->rt_priority;
1943 
1944 	/* record this tasks comm */
1945 	tracing_record_cmdline(tsk);
1946 	latency_fsnotify(tr);
1947 }
1948 
1949 /**
1950  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1951  * @tr: tracer
1952  * @tsk: the task with the latency
1953  * @cpu: The cpu that initiated the trace.
1954  * @cond_data: User data associated with a conditional snapshot
1955  *
1956  * Flip the buffers between the @tr and the max_tr and record information
1957  * about which task was the cause of this latency.
1958  */
1959 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1960 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1961 	      void *cond_data)
1962 {
1963 	if (tr->stop_count)
1964 		return;
1965 
1966 	WARN_ON_ONCE(!irqs_disabled());
1967 
1968 	if (!tr->allocated_snapshot) {
1969 		/* Only the nop tracer should hit this when disabling */
1970 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1971 		return;
1972 	}
1973 
1974 	arch_spin_lock(&tr->max_lock);
1975 
1976 	/* Inherit the recordable setting from array_buffer */
1977 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1978 		ring_buffer_record_on(tr->max_buffer.buffer);
1979 	else
1980 		ring_buffer_record_off(tr->max_buffer.buffer);
1981 
1982 #ifdef CONFIG_TRACER_SNAPSHOT
1983 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1984 		arch_spin_unlock(&tr->max_lock);
1985 		return;
1986 	}
1987 #endif
1988 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1989 
1990 	__update_max_tr(tr, tsk, cpu);
1991 
1992 	arch_spin_unlock(&tr->max_lock);
1993 
1994 	/* Any waiters on the old snapshot buffer need to wake up */
1995 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1996 }
1997 
1998 /**
1999  * update_max_tr_single - only copy one trace over, and reset the rest
2000  * @tr: tracer
2001  * @tsk: task with the latency
2002  * @cpu: the cpu of the buffer to copy.
2003  *
2004  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2005  */
2006 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2007 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2008 {
2009 	int ret;
2010 
2011 	if (tr->stop_count)
2012 		return;
2013 
2014 	WARN_ON_ONCE(!irqs_disabled());
2015 	if (!tr->allocated_snapshot) {
2016 		/* Only the nop tracer should hit this when disabling */
2017 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2018 		return;
2019 	}
2020 
2021 	arch_spin_lock(&tr->max_lock);
2022 
2023 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2024 
2025 	if (ret == -EBUSY) {
2026 		/*
2027 		 * We failed to swap the buffer due to a commit taking
2028 		 * place on this CPU. We fail to record, but we reset
2029 		 * the max trace buffer (no one writes directly to it)
2030 		 * and flag that it failed.
2031 		 * Another reason is resize is in progress.
2032 		 */
2033 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2034 			"Failed to swap buffers due to commit or resize in progress\n");
2035 	}
2036 
2037 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2038 
2039 	__update_max_tr(tr, tsk, cpu);
2040 	arch_spin_unlock(&tr->max_lock);
2041 }
2042 
2043 #endif /* CONFIG_TRACER_MAX_TRACE */
2044 
2045 struct pipe_wait {
2046 	struct trace_iterator		*iter;
2047 	int				wait_index;
2048 };
2049 
wait_pipe_cond(void * data)2050 static bool wait_pipe_cond(void *data)
2051 {
2052 	struct pipe_wait *pwait = data;
2053 	struct trace_iterator *iter = pwait->iter;
2054 
2055 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2056 		return true;
2057 
2058 	return iter->closed;
2059 }
2060 
wait_on_pipe(struct trace_iterator * iter,int full)2061 static int wait_on_pipe(struct trace_iterator *iter, int full)
2062 {
2063 	struct pipe_wait pwait;
2064 	int ret;
2065 
2066 	/* Iterators are static, they should be filled or empty */
2067 	if (trace_buffer_iter(iter, iter->cpu_file))
2068 		return 0;
2069 
2070 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2071 	pwait.iter = iter;
2072 
2073 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2074 			       wait_pipe_cond, &pwait);
2075 
2076 #ifdef CONFIG_TRACER_MAX_TRACE
2077 	/*
2078 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2079 	 * to happen, this would now be the main buffer.
2080 	 */
2081 	if (iter->snapshot)
2082 		iter->array_buffer = &iter->tr->max_buffer;
2083 #endif
2084 	return ret;
2085 }
2086 
2087 #ifdef CONFIG_FTRACE_STARTUP_TEST
2088 static bool selftests_can_run;
2089 
2090 struct trace_selftests {
2091 	struct list_head		list;
2092 	struct tracer			*type;
2093 };
2094 
2095 static LIST_HEAD(postponed_selftests);
2096 
save_selftest(struct tracer * type)2097 static int save_selftest(struct tracer *type)
2098 {
2099 	struct trace_selftests *selftest;
2100 
2101 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2102 	if (!selftest)
2103 		return -ENOMEM;
2104 
2105 	selftest->type = type;
2106 	list_add(&selftest->list, &postponed_selftests);
2107 	return 0;
2108 }
2109 
run_tracer_selftest(struct tracer * type)2110 static int run_tracer_selftest(struct tracer *type)
2111 {
2112 	struct trace_array *tr = &global_trace;
2113 	struct tracer *saved_tracer = tr->current_trace;
2114 	int ret;
2115 
2116 	if (!type->selftest || tracing_selftest_disabled)
2117 		return 0;
2118 
2119 	/*
2120 	 * If a tracer registers early in boot up (before scheduling is
2121 	 * initialized and such), then do not run its selftests yet.
2122 	 * Instead, run it a little later in the boot process.
2123 	 */
2124 	if (!selftests_can_run)
2125 		return save_selftest(type);
2126 
2127 	if (!tracing_is_on()) {
2128 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2129 			type->name);
2130 		return 0;
2131 	}
2132 
2133 	/*
2134 	 * Run a selftest on this tracer.
2135 	 * Here we reset the trace buffer, and set the current
2136 	 * tracer to be this tracer. The tracer can then run some
2137 	 * internal tracing to verify that everything is in order.
2138 	 * If we fail, we do not register this tracer.
2139 	 */
2140 	tracing_reset_online_cpus(&tr->array_buffer);
2141 
2142 	tr->current_trace = type;
2143 
2144 #ifdef CONFIG_TRACER_MAX_TRACE
2145 	if (type->use_max_tr) {
2146 		/* If we expanded the buffers, make sure the max is expanded too */
2147 		if (tr->ring_buffer_expanded)
2148 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2149 					   RING_BUFFER_ALL_CPUS);
2150 		tr->allocated_snapshot = true;
2151 	}
2152 #endif
2153 
2154 	/* the test is responsible for initializing and enabling */
2155 	pr_info("Testing tracer %s: ", type->name);
2156 	ret = type->selftest(type, tr);
2157 	/* the test is responsible for resetting too */
2158 	tr->current_trace = saved_tracer;
2159 	if (ret) {
2160 		printk(KERN_CONT "FAILED!\n");
2161 		/* Add the warning after printing 'FAILED' */
2162 		WARN_ON(1);
2163 		return -1;
2164 	}
2165 	/* Only reset on passing, to avoid touching corrupted buffers */
2166 	tracing_reset_online_cpus(&tr->array_buffer);
2167 
2168 #ifdef CONFIG_TRACER_MAX_TRACE
2169 	if (type->use_max_tr) {
2170 		tr->allocated_snapshot = false;
2171 
2172 		/* Shrink the max buffer again */
2173 		if (tr->ring_buffer_expanded)
2174 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2175 					   RING_BUFFER_ALL_CPUS);
2176 	}
2177 #endif
2178 
2179 	printk(KERN_CONT "PASSED\n");
2180 	return 0;
2181 }
2182 
do_run_tracer_selftest(struct tracer * type)2183 static int do_run_tracer_selftest(struct tracer *type)
2184 {
2185 	int ret;
2186 
2187 	/*
2188 	 * Tests can take a long time, especially if they are run one after the
2189 	 * other, as does happen during bootup when all the tracers are
2190 	 * registered. This could cause the soft lockup watchdog to trigger.
2191 	 */
2192 	cond_resched();
2193 
2194 	tracing_selftest_running = true;
2195 	ret = run_tracer_selftest(type);
2196 	tracing_selftest_running = false;
2197 
2198 	return ret;
2199 }
2200 
init_trace_selftests(void)2201 static __init int init_trace_selftests(void)
2202 {
2203 	struct trace_selftests *p, *n;
2204 	struct tracer *t, **last;
2205 	int ret;
2206 
2207 	selftests_can_run = true;
2208 
2209 	guard(mutex)(&trace_types_lock);
2210 
2211 	if (list_empty(&postponed_selftests))
2212 		return 0;
2213 
2214 	pr_info("Running postponed tracer tests:\n");
2215 
2216 	tracing_selftest_running = true;
2217 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2218 		/* This loop can take minutes when sanitizers are enabled, so
2219 		 * lets make sure we allow RCU processing.
2220 		 */
2221 		cond_resched();
2222 		ret = run_tracer_selftest(p->type);
2223 		/* If the test fails, then warn and remove from available_tracers */
2224 		if (ret < 0) {
2225 			WARN(1, "tracer: %s failed selftest, disabling\n",
2226 			     p->type->name);
2227 			last = &trace_types;
2228 			for (t = trace_types; t; t = t->next) {
2229 				if (t == p->type) {
2230 					*last = t->next;
2231 					break;
2232 				}
2233 				last = &t->next;
2234 			}
2235 		}
2236 		list_del(&p->list);
2237 		kfree(p);
2238 	}
2239 	tracing_selftest_running = false;
2240 
2241 	return 0;
2242 }
2243 core_initcall(init_trace_selftests);
2244 #else
do_run_tracer_selftest(struct tracer * type)2245 static inline int do_run_tracer_selftest(struct tracer *type)
2246 {
2247 	return 0;
2248 }
2249 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2250 
2251 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2252 
2253 static void __init apply_trace_boot_options(void);
2254 
2255 /**
2256  * register_tracer - register a tracer with the ftrace system.
2257  * @type: the plugin for the tracer
2258  *
2259  * Register a new plugin tracer.
2260  */
register_tracer(struct tracer * type)2261 int __init register_tracer(struct tracer *type)
2262 {
2263 	struct tracer *t;
2264 	int ret = 0;
2265 
2266 	if (!type->name) {
2267 		pr_info("Tracer must have a name\n");
2268 		return -1;
2269 	}
2270 
2271 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2272 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2273 		return -1;
2274 	}
2275 
2276 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2277 		pr_warn("Can not register tracer %s due to lockdown\n",
2278 			   type->name);
2279 		return -EPERM;
2280 	}
2281 
2282 	mutex_lock(&trace_types_lock);
2283 
2284 	for (t = trace_types; t; t = t->next) {
2285 		if (strcmp(type->name, t->name) == 0) {
2286 			/* already found */
2287 			pr_info("Tracer %s already registered\n",
2288 				type->name);
2289 			ret = -1;
2290 			goto out;
2291 		}
2292 	}
2293 
2294 	if (!type->set_flag)
2295 		type->set_flag = &dummy_set_flag;
2296 	if (!type->flags) {
2297 		/*allocate a dummy tracer_flags*/
2298 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2299 		if (!type->flags) {
2300 			ret = -ENOMEM;
2301 			goto out;
2302 		}
2303 		type->flags->val = 0;
2304 		type->flags->opts = dummy_tracer_opt;
2305 	} else
2306 		if (!type->flags->opts)
2307 			type->flags->opts = dummy_tracer_opt;
2308 
2309 	/* store the tracer for __set_tracer_option */
2310 	type->flags->trace = type;
2311 
2312 	ret = do_run_tracer_selftest(type);
2313 	if (ret < 0)
2314 		goto out;
2315 
2316 	type->next = trace_types;
2317 	trace_types = type;
2318 	add_tracer_options(&global_trace, type);
2319 
2320  out:
2321 	mutex_unlock(&trace_types_lock);
2322 
2323 	if (ret || !default_bootup_tracer)
2324 		return ret;
2325 
2326 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2327 		return 0;
2328 
2329 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2330 	/* Do we want this tracer to start on bootup? */
2331 	tracing_set_tracer(&global_trace, type->name);
2332 	default_bootup_tracer = NULL;
2333 
2334 	apply_trace_boot_options();
2335 
2336 	/* disable other selftests, since this will break it. */
2337 	disable_tracing_selftest("running a tracer");
2338 
2339 	return 0;
2340 }
2341 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2342 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2343 {
2344 	struct trace_buffer *buffer = buf->buffer;
2345 
2346 	if (!buffer)
2347 		return;
2348 
2349 	ring_buffer_record_disable(buffer);
2350 
2351 	/* Make sure all commits have finished */
2352 	synchronize_rcu();
2353 	ring_buffer_reset_cpu(buffer, cpu);
2354 
2355 	ring_buffer_record_enable(buffer);
2356 }
2357 
tracing_reset_online_cpus(struct array_buffer * buf)2358 void tracing_reset_online_cpus(struct array_buffer *buf)
2359 {
2360 	struct trace_buffer *buffer = buf->buffer;
2361 
2362 	if (!buffer)
2363 		return;
2364 
2365 	ring_buffer_record_disable(buffer);
2366 
2367 	/* Make sure all commits have finished */
2368 	synchronize_rcu();
2369 
2370 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2371 
2372 	ring_buffer_reset_online_cpus(buffer);
2373 
2374 	ring_buffer_record_enable(buffer);
2375 }
2376 
tracing_reset_all_cpus(struct array_buffer * buf)2377 static void tracing_reset_all_cpus(struct array_buffer *buf)
2378 {
2379 	struct trace_buffer *buffer = buf->buffer;
2380 
2381 	if (!buffer)
2382 		return;
2383 
2384 	ring_buffer_record_disable(buffer);
2385 
2386 	/* Make sure all commits have finished */
2387 	synchronize_rcu();
2388 
2389 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2390 
2391 	ring_buffer_reset(buffer);
2392 
2393 	ring_buffer_record_enable(buffer);
2394 }
2395 
2396 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2397 void tracing_reset_all_online_cpus_unlocked(void)
2398 {
2399 	struct trace_array *tr;
2400 
2401 	lockdep_assert_held(&trace_types_lock);
2402 
2403 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2404 		if (!tr->clear_trace)
2405 			continue;
2406 		tr->clear_trace = false;
2407 		tracing_reset_online_cpus(&tr->array_buffer);
2408 #ifdef CONFIG_TRACER_MAX_TRACE
2409 		tracing_reset_online_cpus(&tr->max_buffer);
2410 #endif
2411 	}
2412 }
2413 
tracing_reset_all_online_cpus(void)2414 void tracing_reset_all_online_cpus(void)
2415 {
2416 	mutex_lock(&trace_types_lock);
2417 	tracing_reset_all_online_cpus_unlocked();
2418 	mutex_unlock(&trace_types_lock);
2419 }
2420 
is_tracing_stopped(void)2421 int is_tracing_stopped(void)
2422 {
2423 	return global_trace.stop_count;
2424 }
2425 
tracing_start_tr(struct trace_array * tr)2426 static void tracing_start_tr(struct trace_array *tr)
2427 {
2428 	struct trace_buffer *buffer;
2429 	unsigned long flags;
2430 
2431 	if (tracing_disabled)
2432 		return;
2433 
2434 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2435 	if (--tr->stop_count) {
2436 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2437 			/* Someone screwed up their debugging */
2438 			tr->stop_count = 0;
2439 		}
2440 		goto out;
2441 	}
2442 
2443 	/* Prevent the buffers from switching */
2444 	arch_spin_lock(&tr->max_lock);
2445 
2446 	buffer = tr->array_buffer.buffer;
2447 	if (buffer)
2448 		ring_buffer_record_enable(buffer);
2449 
2450 #ifdef CONFIG_TRACER_MAX_TRACE
2451 	buffer = tr->max_buffer.buffer;
2452 	if (buffer)
2453 		ring_buffer_record_enable(buffer);
2454 #endif
2455 
2456 	arch_spin_unlock(&tr->max_lock);
2457 
2458  out:
2459 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2460 }
2461 
2462 /**
2463  * tracing_start - quick start of the tracer
2464  *
2465  * If tracing is enabled but was stopped by tracing_stop,
2466  * this will start the tracer back up.
2467  */
tracing_start(void)2468 void tracing_start(void)
2469 
2470 {
2471 	return tracing_start_tr(&global_trace);
2472 }
2473 
tracing_stop_tr(struct trace_array * tr)2474 static void tracing_stop_tr(struct trace_array *tr)
2475 {
2476 	struct trace_buffer *buffer;
2477 	unsigned long flags;
2478 
2479 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2480 	if (tr->stop_count++)
2481 		goto out;
2482 
2483 	/* Prevent the buffers from switching */
2484 	arch_spin_lock(&tr->max_lock);
2485 
2486 	buffer = tr->array_buffer.buffer;
2487 	if (buffer)
2488 		ring_buffer_record_disable(buffer);
2489 
2490 #ifdef CONFIG_TRACER_MAX_TRACE
2491 	buffer = tr->max_buffer.buffer;
2492 	if (buffer)
2493 		ring_buffer_record_disable(buffer);
2494 #endif
2495 
2496 	arch_spin_unlock(&tr->max_lock);
2497 
2498  out:
2499 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2500 }
2501 
2502 /**
2503  * tracing_stop - quick stop of the tracer
2504  *
2505  * Light weight way to stop tracing. Use in conjunction with
2506  * tracing_start.
2507  */
tracing_stop(void)2508 void tracing_stop(void)
2509 {
2510 	return tracing_stop_tr(&global_trace);
2511 }
2512 
2513 /*
2514  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2515  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2516  * simplifies those functions and keeps them in sync.
2517  */
trace_handle_return(struct trace_seq * s)2518 enum print_line_t trace_handle_return(struct trace_seq *s)
2519 {
2520 	return trace_seq_has_overflowed(s) ?
2521 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2522 }
2523 EXPORT_SYMBOL_GPL(trace_handle_return);
2524 
migration_disable_value(void)2525 static unsigned short migration_disable_value(void)
2526 {
2527 #if defined(CONFIG_SMP)
2528 	return current->migration_disabled;
2529 #else
2530 	return 0;
2531 #endif
2532 }
2533 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2534 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2535 {
2536 	unsigned int trace_flags = irqs_status;
2537 	unsigned int pc;
2538 
2539 	pc = preempt_count();
2540 
2541 	if (pc & NMI_MASK)
2542 		trace_flags |= TRACE_FLAG_NMI;
2543 	if (pc & HARDIRQ_MASK)
2544 		trace_flags |= TRACE_FLAG_HARDIRQ;
2545 	if (in_serving_softirq())
2546 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2547 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2548 		trace_flags |= TRACE_FLAG_BH_OFF;
2549 
2550 	if (tif_need_resched())
2551 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2552 	if (test_preempt_need_resched())
2553 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2554 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2555 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2556 }
2557 
2558 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2559 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2560 			  int type,
2561 			  unsigned long len,
2562 			  unsigned int trace_ctx)
2563 {
2564 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2565 }
2566 
2567 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2568 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2569 static int trace_buffered_event_ref;
2570 
2571 /**
2572  * trace_buffered_event_enable - enable buffering events
2573  *
2574  * When events are being filtered, it is quicker to use a temporary
2575  * buffer to write the event data into if there's a likely chance
2576  * that it will not be committed. The discard of the ring buffer
2577  * is not as fast as committing, and is much slower than copying
2578  * a commit.
2579  *
2580  * When an event is to be filtered, allocate per cpu buffers to
2581  * write the event data into, and if the event is filtered and discarded
2582  * it is simply dropped, otherwise, the entire data is to be committed
2583  * in one shot.
2584  */
trace_buffered_event_enable(void)2585 void trace_buffered_event_enable(void)
2586 {
2587 	struct ring_buffer_event *event;
2588 	struct page *page;
2589 	int cpu;
2590 
2591 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2592 
2593 	if (trace_buffered_event_ref++)
2594 		return;
2595 
2596 	for_each_tracing_cpu(cpu) {
2597 		page = alloc_pages_node(cpu_to_node(cpu),
2598 					GFP_KERNEL | __GFP_NORETRY, 0);
2599 		/* This is just an optimization and can handle failures */
2600 		if (!page) {
2601 			pr_err("Failed to allocate event buffer\n");
2602 			break;
2603 		}
2604 
2605 		event = page_address(page);
2606 		memset(event, 0, sizeof(*event));
2607 
2608 		per_cpu(trace_buffered_event, cpu) = event;
2609 
2610 		preempt_disable();
2611 		if (cpu == smp_processor_id() &&
2612 		    __this_cpu_read(trace_buffered_event) !=
2613 		    per_cpu(trace_buffered_event, cpu))
2614 			WARN_ON_ONCE(1);
2615 		preempt_enable();
2616 	}
2617 }
2618 
enable_trace_buffered_event(void * data)2619 static void enable_trace_buffered_event(void *data)
2620 {
2621 	/* Probably not needed, but do it anyway */
2622 	smp_rmb();
2623 	this_cpu_dec(trace_buffered_event_cnt);
2624 }
2625 
disable_trace_buffered_event(void * data)2626 static void disable_trace_buffered_event(void *data)
2627 {
2628 	this_cpu_inc(trace_buffered_event_cnt);
2629 }
2630 
2631 /**
2632  * trace_buffered_event_disable - disable buffering events
2633  *
2634  * When a filter is removed, it is faster to not use the buffered
2635  * events, and to commit directly into the ring buffer. Free up
2636  * the temp buffers when there are no more users. This requires
2637  * special synchronization with current events.
2638  */
trace_buffered_event_disable(void)2639 void trace_buffered_event_disable(void)
2640 {
2641 	int cpu;
2642 
2643 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2644 
2645 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2646 		return;
2647 
2648 	if (--trace_buffered_event_ref)
2649 		return;
2650 
2651 	/* For each CPU, set the buffer as used. */
2652 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2653 			 NULL, true);
2654 
2655 	/* Wait for all current users to finish */
2656 	synchronize_rcu();
2657 
2658 	for_each_tracing_cpu(cpu) {
2659 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2660 		per_cpu(trace_buffered_event, cpu) = NULL;
2661 	}
2662 
2663 	/*
2664 	 * Wait for all CPUs that potentially started checking if they can use
2665 	 * their event buffer only after the previous synchronize_rcu() call and
2666 	 * they still read a valid pointer from trace_buffered_event. It must be
2667 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2668 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2669 	 */
2670 	synchronize_rcu();
2671 
2672 	/* For each CPU, relinquish the buffer */
2673 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2674 			 true);
2675 }
2676 
2677 static struct trace_buffer *temp_buffer;
2678 
2679 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2680 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2681 			  struct trace_event_file *trace_file,
2682 			  int type, unsigned long len,
2683 			  unsigned int trace_ctx)
2684 {
2685 	struct ring_buffer_event *entry;
2686 	struct trace_array *tr = trace_file->tr;
2687 	int val;
2688 
2689 	*current_rb = tr->array_buffer.buffer;
2690 
2691 	if (!tr->no_filter_buffering_ref &&
2692 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2693 		preempt_disable_notrace();
2694 		/*
2695 		 * Filtering is on, so try to use the per cpu buffer first.
2696 		 * This buffer will simulate a ring_buffer_event,
2697 		 * where the type_len is zero and the array[0] will
2698 		 * hold the full length.
2699 		 * (see include/linux/ring-buffer.h for details on
2700 		 *  how the ring_buffer_event is structured).
2701 		 *
2702 		 * Using a temp buffer during filtering and copying it
2703 		 * on a matched filter is quicker than writing directly
2704 		 * into the ring buffer and then discarding it when
2705 		 * it doesn't match. That is because the discard
2706 		 * requires several atomic operations to get right.
2707 		 * Copying on match and doing nothing on a failed match
2708 		 * is still quicker than no copy on match, but having
2709 		 * to discard out of the ring buffer on a failed match.
2710 		 */
2711 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2712 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2713 
2714 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2715 
2716 			/*
2717 			 * Preemption is disabled, but interrupts and NMIs
2718 			 * can still come in now. If that happens after
2719 			 * the above increment, then it will have to go
2720 			 * back to the old method of allocating the event
2721 			 * on the ring buffer, and if the filter fails, it
2722 			 * will have to call ring_buffer_discard_commit()
2723 			 * to remove it.
2724 			 *
2725 			 * Need to also check the unlikely case that the
2726 			 * length is bigger than the temp buffer size.
2727 			 * If that happens, then the reserve is pretty much
2728 			 * guaranteed to fail, as the ring buffer currently
2729 			 * only allows events less than a page. But that may
2730 			 * change in the future, so let the ring buffer reserve
2731 			 * handle the failure in that case.
2732 			 */
2733 			if (val == 1 && likely(len <= max_len)) {
2734 				trace_event_setup(entry, type, trace_ctx);
2735 				entry->array[0] = len;
2736 				/* Return with preemption disabled */
2737 				return entry;
2738 			}
2739 			this_cpu_dec(trace_buffered_event_cnt);
2740 		}
2741 		/* __trace_buffer_lock_reserve() disables preemption */
2742 		preempt_enable_notrace();
2743 	}
2744 
2745 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2746 					    trace_ctx);
2747 	/*
2748 	 * If tracing is off, but we have triggers enabled
2749 	 * we still need to look at the event data. Use the temp_buffer
2750 	 * to store the trace event for the trigger to use. It's recursive
2751 	 * safe and will not be recorded anywhere.
2752 	 */
2753 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2754 		*current_rb = temp_buffer;
2755 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2756 						    trace_ctx);
2757 	}
2758 	return entry;
2759 }
2760 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2761 
2762 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2763 static DEFINE_MUTEX(tracepoint_printk_mutex);
2764 
output_printk(struct trace_event_buffer * fbuffer)2765 static void output_printk(struct trace_event_buffer *fbuffer)
2766 {
2767 	struct trace_event_call *event_call;
2768 	struct trace_event_file *file;
2769 	struct trace_event *event;
2770 	unsigned long flags;
2771 	struct trace_iterator *iter = tracepoint_print_iter;
2772 
2773 	/* We should never get here if iter is NULL */
2774 	if (WARN_ON_ONCE(!iter))
2775 		return;
2776 
2777 	event_call = fbuffer->trace_file->event_call;
2778 	if (!event_call || !event_call->event.funcs ||
2779 	    !event_call->event.funcs->trace)
2780 		return;
2781 
2782 	file = fbuffer->trace_file;
2783 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2784 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2785 	     !filter_match_preds(file->filter, fbuffer->entry)))
2786 		return;
2787 
2788 	event = &fbuffer->trace_file->event_call->event;
2789 
2790 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2791 	trace_seq_init(&iter->seq);
2792 	iter->ent = fbuffer->entry;
2793 	event_call->event.funcs->trace(iter, 0, event);
2794 	trace_seq_putc(&iter->seq, 0);
2795 	printk("%s", iter->seq.buffer);
2796 
2797 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2798 }
2799 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2800 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2801 			     void *buffer, size_t *lenp,
2802 			     loff_t *ppos)
2803 {
2804 	int save_tracepoint_printk;
2805 	int ret;
2806 
2807 	guard(mutex)(&tracepoint_printk_mutex);
2808 	save_tracepoint_printk = tracepoint_printk;
2809 
2810 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2811 
2812 	/*
2813 	 * This will force exiting early, as tracepoint_printk
2814 	 * is always zero when tracepoint_printk_iter is not allocated
2815 	 */
2816 	if (!tracepoint_print_iter)
2817 		tracepoint_printk = 0;
2818 
2819 	if (save_tracepoint_printk == tracepoint_printk)
2820 		return ret;
2821 
2822 	if (tracepoint_printk)
2823 		static_key_enable(&tracepoint_printk_key.key);
2824 	else
2825 		static_key_disable(&tracepoint_printk_key.key);
2826 
2827 	return ret;
2828 }
2829 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2830 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2831 {
2832 	enum event_trigger_type tt = ETT_NONE;
2833 	struct trace_event_file *file = fbuffer->trace_file;
2834 
2835 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2836 			fbuffer->entry, &tt))
2837 		goto discard;
2838 
2839 	if (static_key_false(&tracepoint_printk_key.key))
2840 		output_printk(fbuffer);
2841 
2842 	if (static_branch_unlikely(&trace_event_exports_enabled))
2843 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2844 
2845 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2846 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2847 
2848 discard:
2849 	if (tt)
2850 		event_triggers_post_call(file, tt);
2851 
2852 }
2853 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2854 
2855 /*
2856  * Skip 3:
2857  *
2858  *   trace_buffer_unlock_commit_regs()
2859  *   trace_event_buffer_commit()
2860  *   trace_event_raw_event_xxx()
2861  */
2862 # define STACK_SKIP 3
2863 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2864 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2865 				     struct trace_buffer *buffer,
2866 				     struct ring_buffer_event *event,
2867 				     unsigned int trace_ctx,
2868 				     struct pt_regs *regs)
2869 {
2870 	__buffer_unlock_commit(buffer, event);
2871 
2872 	/*
2873 	 * If regs is not set, then skip the necessary functions.
2874 	 * Note, we can still get here via blktrace, wakeup tracer
2875 	 * and mmiotrace, but that's ok if they lose a function or
2876 	 * two. They are not that meaningful.
2877 	 */
2878 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2879 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2880 }
2881 
2882 /*
2883  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2884  */
2885 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2886 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2887 				   struct ring_buffer_event *event)
2888 {
2889 	__buffer_unlock_commit(buffer, event);
2890 }
2891 
2892 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)2893 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2894 	       parent_ip, unsigned int trace_ctx)
2895 {
2896 	struct trace_event_call *call = &event_function;
2897 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2898 	struct ring_buffer_event *event;
2899 	struct ftrace_entry *entry;
2900 
2901 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2902 					    trace_ctx);
2903 	if (!event)
2904 		return;
2905 	entry	= ring_buffer_event_data(event);
2906 	entry->ip			= ip;
2907 	entry->parent_ip		= parent_ip;
2908 
2909 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2910 		if (static_branch_unlikely(&trace_function_exports_enabled))
2911 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2912 		__buffer_unlock_commit(buffer, event);
2913 	}
2914 }
2915 
2916 #ifdef CONFIG_STACKTRACE
2917 
2918 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2919 #define FTRACE_KSTACK_NESTING	4
2920 
2921 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2922 
2923 struct ftrace_stack {
2924 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2925 };
2926 
2927 
2928 struct ftrace_stacks {
2929 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2930 };
2931 
2932 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2933 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2934 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2935 static void __ftrace_trace_stack(struct trace_array *tr,
2936 				 struct trace_buffer *buffer,
2937 				 unsigned int trace_ctx,
2938 				 int skip, struct pt_regs *regs)
2939 {
2940 	struct trace_event_call *call = &event_kernel_stack;
2941 	struct ring_buffer_event *event;
2942 	unsigned int size, nr_entries;
2943 	struct ftrace_stack *fstack;
2944 	struct stack_entry *entry;
2945 	int stackidx;
2946 
2947 	/*
2948 	 * Add one, for this function and the call to save_stack_trace()
2949 	 * If regs is set, then these functions will not be in the way.
2950 	 */
2951 #ifndef CONFIG_UNWINDER_ORC
2952 	if (!regs)
2953 		skip++;
2954 #endif
2955 
2956 	preempt_disable_notrace();
2957 
2958 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2959 
2960 	/* This should never happen. If it does, yell once and skip */
2961 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2962 		goto out;
2963 
2964 	/*
2965 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2966 	 * interrupt will either see the value pre increment or post
2967 	 * increment. If the interrupt happens pre increment it will have
2968 	 * restored the counter when it returns.  We just need a barrier to
2969 	 * keep gcc from moving things around.
2970 	 */
2971 	barrier();
2972 
2973 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2974 	size = ARRAY_SIZE(fstack->calls);
2975 
2976 	if (regs) {
2977 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2978 						   size, skip);
2979 	} else {
2980 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2981 	}
2982 
2983 #ifdef CONFIG_DYNAMIC_FTRACE
2984 	/* Mark entry of stack trace as trampoline code */
2985 	if (tr->ops && tr->ops->trampoline) {
2986 		unsigned long tramp_start = tr->ops->trampoline;
2987 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2988 		unsigned long *calls = fstack->calls;
2989 
2990 		for (int i = 0; i < nr_entries; i++) {
2991 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2992 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2993 		}
2994 	}
2995 #endif
2996 
2997 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2998 				    struct_size(entry, caller, nr_entries),
2999 				    trace_ctx);
3000 	if (!event)
3001 		goto out;
3002 	entry = ring_buffer_event_data(event);
3003 
3004 	entry->size = nr_entries;
3005 	memcpy(&entry->caller, fstack->calls,
3006 	       flex_array_size(entry, caller, nr_entries));
3007 
3008 	if (!call_filter_check_discard(call, entry, buffer, event))
3009 		__buffer_unlock_commit(buffer, event);
3010 
3011  out:
3012 	/* Again, don't let gcc optimize things here */
3013 	barrier();
3014 	__this_cpu_dec(ftrace_stack_reserve);
3015 	preempt_enable_notrace();
3016 
3017 }
3018 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3019 static inline void ftrace_trace_stack(struct trace_array *tr,
3020 				      struct trace_buffer *buffer,
3021 				      unsigned int trace_ctx,
3022 				      int skip, struct pt_regs *regs)
3023 {
3024 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3025 		return;
3026 
3027 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3028 }
3029 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3030 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3031 		   int skip)
3032 {
3033 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3034 
3035 	if (rcu_is_watching()) {
3036 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3037 		return;
3038 	}
3039 
3040 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3041 		return;
3042 
3043 	/*
3044 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3045 	 * but if the above rcu_is_watching() failed, then the NMI
3046 	 * triggered someplace critical, and ct_irq_enter() should
3047 	 * not be called from NMI.
3048 	 */
3049 	if (unlikely(in_nmi()))
3050 		return;
3051 
3052 	ct_irq_enter_irqson();
3053 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3054 	ct_irq_exit_irqson();
3055 }
3056 
3057 /**
3058  * trace_dump_stack - record a stack back trace in the trace buffer
3059  * @skip: Number of functions to skip (helper handlers)
3060  */
trace_dump_stack(int skip)3061 void trace_dump_stack(int skip)
3062 {
3063 	if (tracing_disabled || tracing_selftest_running)
3064 		return;
3065 
3066 #ifndef CONFIG_UNWINDER_ORC
3067 	/* Skip 1 to skip this function. */
3068 	skip++;
3069 #endif
3070 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3071 				tracing_gen_ctx(), skip, NULL);
3072 }
3073 EXPORT_SYMBOL_GPL(trace_dump_stack);
3074 
3075 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3076 static DEFINE_PER_CPU(int, user_stack_count);
3077 
3078 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3079 ftrace_trace_userstack(struct trace_array *tr,
3080 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3081 {
3082 	struct trace_event_call *call = &event_user_stack;
3083 	struct ring_buffer_event *event;
3084 	struct userstack_entry *entry;
3085 
3086 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3087 		return;
3088 
3089 	/*
3090 	 * NMIs can not handle page faults, even with fix ups.
3091 	 * The save user stack can (and often does) fault.
3092 	 */
3093 	if (unlikely(in_nmi()))
3094 		return;
3095 
3096 	/*
3097 	 * prevent recursion, since the user stack tracing may
3098 	 * trigger other kernel events.
3099 	 */
3100 	preempt_disable();
3101 	if (__this_cpu_read(user_stack_count))
3102 		goto out;
3103 
3104 	__this_cpu_inc(user_stack_count);
3105 
3106 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3107 					    sizeof(*entry), trace_ctx);
3108 	if (!event)
3109 		goto out_drop_count;
3110 	entry	= ring_buffer_event_data(event);
3111 
3112 	entry->tgid		= current->tgid;
3113 	memset(&entry->caller, 0, sizeof(entry->caller));
3114 
3115 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3116 	if (!call_filter_check_discard(call, entry, buffer, event))
3117 		__buffer_unlock_commit(buffer, event);
3118 
3119  out_drop_count:
3120 	__this_cpu_dec(user_stack_count);
3121  out:
3122 	preempt_enable();
3123 }
3124 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3125 static void ftrace_trace_userstack(struct trace_array *tr,
3126 				   struct trace_buffer *buffer,
3127 				   unsigned int trace_ctx)
3128 {
3129 }
3130 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3131 
3132 #endif /* CONFIG_STACKTRACE */
3133 
3134 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3135 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3136 			  unsigned long long delta)
3137 {
3138 	entry->bottom_delta_ts = delta & U32_MAX;
3139 	entry->top_delta_ts = (delta >> 32);
3140 }
3141 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3142 void trace_last_func_repeats(struct trace_array *tr,
3143 			     struct trace_func_repeats *last_info,
3144 			     unsigned int trace_ctx)
3145 {
3146 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3147 	struct func_repeats_entry *entry;
3148 	struct ring_buffer_event *event;
3149 	u64 delta;
3150 
3151 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3152 					    sizeof(*entry), trace_ctx);
3153 	if (!event)
3154 		return;
3155 
3156 	delta = ring_buffer_event_time_stamp(buffer, event) -
3157 		last_info->ts_last_call;
3158 
3159 	entry = ring_buffer_event_data(event);
3160 	entry->ip = last_info->ip;
3161 	entry->parent_ip = last_info->parent_ip;
3162 	entry->count = last_info->count;
3163 	func_repeats_set_delta_ts(entry, delta);
3164 
3165 	__buffer_unlock_commit(buffer, event);
3166 }
3167 
3168 /* created for use with alloc_percpu */
3169 struct trace_buffer_struct {
3170 	int nesting;
3171 	char buffer[4][TRACE_BUF_SIZE];
3172 };
3173 
3174 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3175 
3176 /*
3177  * This allows for lockless recording.  If we're nested too deeply, then
3178  * this returns NULL.
3179  */
get_trace_buf(void)3180 static char *get_trace_buf(void)
3181 {
3182 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3183 
3184 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3185 		return NULL;
3186 
3187 	buffer->nesting++;
3188 
3189 	/* Interrupts must see nesting incremented before we use the buffer */
3190 	barrier();
3191 	return &buffer->buffer[buffer->nesting - 1][0];
3192 }
3193 
put_trace_buf(void)3194 static void put_trace_buf(void)
3195 {
3196 	/* Don't let the decrement of nesting leak before this */
3197 	barrier();
3198 	this_cpu_dec(trace_percpu_buffer->nesting);
3199 }
3200 
alloc_percpu_trace_buffer(void)3201 static int alloc_percpu_trace_buffer(void)
3202 {
3203 	struct trace_buffer_struct __percpu *buffers;
3204 
3205 	if (trace_percpu_buffer)
3206 		return 0;
3207 
3208 	buffers = alloc_percpu(struct trace_buffer_struct);
3209 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3210 		return -ENOMEM;
3211 
3212 	trace_percpu_buffer = buffers;
3213 	return 0;
3214 }
3215 
3216 static int buffers_allocated;
3217 
trace_printk_init_buffers(void)3218 void trace_printk_init_buffers(void)
3219 {
3220 	if (buffers_allocated)
3221 		return;
3222 
3223 	if (alloc_percpu_trace_buffer())
3224 		return;
3225 
3226 	/* trace_printk() is for debug use only. Don't use it in production. */
3227 
3228 	pr_warn("\n");
3229 	pr_warn("**********************************************************\n");
3230 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3231 	pr_warn("**                                                      **\n");
3232 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3233 	pr_warn("**                                                      **\n");
3234 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3235 	pr_warn("** unsafe for production use.                           **\n");
3236 	pr_warn("**                                                      **\n");
3237 	pr_warn("** If you see this message and you are not debugging    **\n");
3238 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3239 	pr_warn("**                                                      **\n");
3240 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3241 	pr_warn("**********************************************************\n");
3242 
3243 	/* Expand the buffers to set size */
3244 	tracing_update_buffers(&global_trace);
3245 
3246 	buffers_allocated = 1;
3247 
3248 	/*
3249 	 * trace_printk_init_buffers() can be called by modules.
3250 	 * If that happens, then we need to start cmdline recording
3251 	 * directly here. If the global_trace.buffer is already
3252 	 * allocated here, then this was called by module code.
3253 	 */
3254 	if (global_trace.array_buffer.buffer)
3255 		tracing_start_cmdline_record();
3256 }
3257 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3258 
trace_printk_start_comm(void)3259 void trace_printk_start_comm(void)
3260 {
3261 	/* Start tracing comms if trace printk is set */
3262 	if (!buffers_allocated)
3263 		return;
3264 	tracing_start_cmdline_record();
3265 }
3266 
trace_printk_start_stop_comm(int enabled)3267 static void trace_printk_start_stop_comm(int enabled)
3268 {
3269 	if (!buffers_allocated)
3270 		return;
3271 
3272 	if (enabled)
3273 		tracing_start_cmdline_record();
3274 	else
3275 		tracing_stop_cmdline_record();
3276 }
3277 
3278 /**
3279  * trace_vbprintk - write binary msg to tracing buffer
3280  * @ip:    The address of the caller
3281  * @fmt:   The string format to write to the buffer
3282  * @args:  Arguments for @fmt
3283  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3284 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3285 {
3286 	struct trace_event_call *call = &event_bprint;
3287 	struct ring_buffer_event *event;
3288 	struct trace_buffer *buffer;
3289 	struct trace_array *tr = READ_ONCE(printk_trace);
3290 	struct bprint_entry *entry;
3291 	unsigned int trace_ctx;
3292 	char *tbuffer;
3293 	int len = 0, size;
3294 
3295 	if (!printk_binsafe(tr))
3296 		return trace_vprintk(ip, fmt, args);
3297 
3298 	if (unlikely(tracing_selftest_running || tracing_disabled))
3299 		return 0;
3300 
3301 	/* Don't pollute graph traces with trace_vprintk internals */
3302 	pause_graph_tracing();
3303 
3304 	trace_ctx = tracing_gen_ctx();
3305 	preempt_disable_notrace();
3306 
3307 	tbuffer = get_trace_buf();
3308 	if (!tbuffer) {
3309 		len = 0;
3310 		goto out_nobuffer;
3311 	}
3312 
3313 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3314 
3315 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3316 		goto out_put;
3317 
3318 	size = sizeof(*entry) + sizeof(u32) * len;
3319 	buffer = tr->array_buffer.buffer;
3320 	ring_buffer_nest_start(buffer);
3321 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3322 					    trace_ctx);
3323 	if (!event)
3324 		goto out;
3325 	entry = ring_buffer_event_data(event);
3326 	entry->ip			= ip;
3327 	entry->fmt			= fmt;
3328 
3329 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3330 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3331 		__buffer_unlock_commit(buffer, event);
3332 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3333 	}
3334 
3335 out:
3336 	ring_buffer_nest_end(buffer);
3337 out_put:
3338 	put_trace_buf();
3339 
3340 out_nobuffer:
3341 	preempt_enable_notrace();
3342 	unpause_graph_tracing();
3343 
3344 	return len;
3345 }
3346 EXPORT_SYMBOL_GPL(trace_vbprintk);
3347 
3348 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3349 int __trace_array_vprintk(struct trace_buffer *buffer,
3350 			  unsigned long ip, const char *fmt, va_list args)
3351 {
3352 	struct trace_event_call *call = &event_print;
3353 	struct ring_buffer_event *event;
3354 	int len = 0, size;
3355 	struct print_entry *entry;
3356 	unsigned int trace_ctx;
3357 	char *tbuffer;
3358 
3359 	if (tracing_disabled)
3360 		return 0;
3361 
3362 	/* Don't pollute graph traces with trace_vprintk internals */
3363 	pause_graph_tracing();
3364 
3365 	trace_ctx = tracing_gen_ctx();
3366 	preempt_disable_notrace();
3367 
3368 
3369 	tbuffer = get_trace_buf();
3370 	if (!tbuffer) {
3371 		len = 0;
3372 		goto out_nobuffer;
3373 	}
3374 
3375 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3376 
3377 	size = sizeof(*entry) + len + 1;
3378 	ring_buffer_nest_start(buffer);
3379 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3380 					    trace_ctx);
3381 	if (!event)
3382 		goto out;
3383 	entry = ring_buffer_event_data(event);
3384 	entry->ip = ip;
3385 
3386 	memcpy(&entry->buf, tbuffer, len + 1);
3387 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3388 		__buffer_unlock_commit(buffer, event);
3389 		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3390 	}
3391 
3392 out:
3393 	ring_buffer_nest_end(buffer);
3394 	put_trace_buf();
3395 
3396 out_nobuffer:
3397 	preempt_enable_notrace();
3398 	unpause_graph_tracing();
3399 
3400 	return len;
3401 }
3402 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3403 int trace_array_vprintk(struct trace_array *tr,
3404 			unsigned long ip, const char *fmt, va_list args)
3405 {
3406 	if (tracing_selftest_running && tr == &global_trace)
3407 		return 0;
3408 
3409 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3410 }
3411 
3412 /**
3413  * trace_array_printk - Print a message to a specific instance
3414  * @tr: The instance trace_array descriptor
3415  * @ip: The instruction pointer that this is called from.
3416  * @fmt: The format to print (printf format)
3417  *
3418  * If a subsystem sets up its own instance, they have the right to
3419  * printk strings into their tracing instance buffer using this
3420  * function. Note, this function will not write into the top level
3421  * buffer (use trace_printk() for that), as writing into the top level
3422  * buffer should only have events that can be individually disabled.
3423  * trace_printk() is only used for debugging a kernel, and should not
3424  * be ever incorporated in normal use.
3425  *
3426  * trace_array_printk() can be used, as it will not add noise to the
3427  * top level tracing buffer.
3428  *
3429  * Note, trace_array_init_printk() must be called on @tr before this
3430  * can be used.
3431  */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3432 int trace_array_printk(struct trace_array *tr,
3433 		       unsigned long ip, const char *fmt, ...)
3434 {
3435 	int ret;
3436 	va_list ap;
3437 
3438 	if (!tr)
3439 		return -ENOENT;
3440 
3441 	/* This is only allowed for created instances */
3442 	if (tr == &global_trace)
3443 		return 0;
3444 
3445 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3446 		return 0;
3447 
3448 	va_start(ap, fmt);
3449 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3450 	va_end(ap);
3451 	return ret;
3452 }
3453 EXPORT_SYMBOL_GPL(trace_array_printk);
3454 
3455 /**
3456  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3457  * @tr: The trace array to initialize the buffers for
3458  *
3459  * As trace_array_printk() only writes into instances, they are OK to
3460  * have in the kernel (unlike trace_printk()). This needs to be called
3461  * before trace_array_printk() can be used on a trace_array.
3462  */
trace_array_init_printk(struct trace_array * tr)3463 int trace_array_init_printk(struct trace_array *tr)
3464 {
3465 	if (!tr)
3466 		return -ENOENT;
3467 
3468 	/* This is only allowed for created instances */
3469 	if (tr == &global_trace)
3470 		return -EINVAL;
3471 
3472 	return alloc_percpu_trace_buffer();
3473 }
3474 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3475 
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3476 int trace_array_printk_buf(struct trace_buffer *buffer,
3477 			   unsigned long ip, const char *fmt, ...)
3478 {
3479 	int ret;
3480 	va_list ap;
3481 
3482 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3483 		return 0;
3484 
3485 	va_start(ap, fmt);
3486 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3487 	va_end(ap);
3488 	return ret;
3489 }
3490 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3491 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3492 {
3493 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3494 }
3495 EXPORT_SYMBOL_GPL(trace_vprintk);
3496 
trace_iterator_increment(struct trace_iterator * iter)3497 static void trace_iterator_increment(struct trace_iterator *iter)
3498 {
3499 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3500 
3501 	iter->idx++;
3502 	if (buf_iter)
3503 		ring_buffer_iter_advance(buf_iter);
3504 }
3505 
3506 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3507 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3508 		unsigned long *lost_events)
3509 {
3510 	struct ring_buffer_event *event;
3511 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3512 
3513 	if (buf_iter) {
3514 		event = ring_buffer_iter_peek(buf_iter, ts);
3515 		if (lost_events)
3516 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3517 				(unsigned long)-1 : 0;
3518 	} else {
3519 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3520 					 lost_events);
3521 	}
3522 
3523 	if (event) {
3524 		iter->ent_size = ring_buffer_event_length(event);
3525 		return ring_buffer_event_data(event);
3526 	}
3527 	iter->ent_size = 0;
3528 	return NULL;
3529 }
3530 
3531 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3532 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3533 		  unsigned long *missing_events, u64 *ent_ts)
3534 {
3535 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3536 	struct trace_entry *ent, *next = NULL;
3537 	unsigned long lost_events = 0, next_lost = 0;
3538 	int cpu_file = iter->cpu_file;
3539 	u64 next_ts = 0, ts;
3540 	int next_cpu = -1;
3541 	int next_size = 0;
3542 	int cpu;
3543 
3544 	/*
3545 	 * If we are in a per_cpu trace file, don't bother by iterating over
3546 	 * all cpu and peek directly.
3547 	 */
3548 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3549 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3550 			return NULL;
3551 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3552 		if (ent_cpu)
3553 			*ent_cpu = cpu_file;
3554 
3555 		return ent;
3556 	}
3557 
3558 	for_each_tracing_cpu(cpu) {
3559 
3560 		if (ring_buffer_empty_cpu(buffer, cpu))
3561 			continue;
3562 
3563 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3564 
3565 		/*
3566 		 * Pick the entry with the smallest timestamp:
3567 		 */
3568 		if (ent && (!next || ts < next_ts)) {
3569 			next = ent;
3570 			next_cpu = cpu;
3571 			next_ts = ts;
3572 			next_lost = lost_events;
3573 			next_size = iter->ent_size;
3574 		}
3575 	}
3576 
3577 	iter->ent_size = next_size;
3578 
3579 	if (ent_cpu)
3580 		*ent_cpu = next_cpu;
3581 
3582 	if (ent_ts)
3583 		*ent_ts = next_ts;
3584 
3585 	if (missing_events)
3586 		*missing_events = next_lost;
3587 
3588 	return next;
3589 }
3590 
3591 #define STATIC_FMT_BUF_SIZE	128
3592 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3593 
trace_iter_expand_format(struct trace_iterator * iter)3594 char *trace_iter_expand_format(struct trace_iterator *iter)
3595 {
3596 	char *tmp;
3597 
3598 	/*
3599 	 * iter->tr is NULL when used with tp_printk, which makes
3600 	 * this get called where it is not safe to call krealloc().
3601 	 */
3602 	if (!iter->tr || iter->fmt == static_fmt_buf)
3603 		return NULL;
3604 
3605 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3606 		       GFP_KERNEL);
3607 	if (tmp) {
3608 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3609 		iter->fmt = tmp;
3610 	}
3611 
3612 	return tmp;
3613 }
3614 
3615 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3616 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3617 {
3618 	unsigned long addr = (unsigned long)str;
3619 	struct trace_event *trace_event;
3620 	struct trace_event_call *event;
3621 
3622 	/* OK if part of the event data */
3623 	if ((addr >= (unsigned long)iter->ent) &&
3624 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3625 		return true;
3626 
3627 	/* OK if part of the temp seq buffer */
3628 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3629 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3630 		return true;
3631 
3632 	/* Core rodata can not be freed */
3633 	if (is_kernel_rodata(addr))
3634 		return true;
3635 
3636 	if (trace_is_tracepoint_string(str))
3637 		return true;
3638 
3639 	/*
3640 	 * Now this could be a module event, referencing core module
3641 	 * data, which is OK.
3642 	 */
3643 	if (!iter->ent)
3644 		return false;
3645 
3646 	trace_event = ftrace_find_event(iter->ent->type);
3647 	if (!trace_event)
3648 		return false;
3649 
3650 	event = container_of(trace_event, struct trace_event_call, event);
3651 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3652 		return false;
3653 
3654 	/* Would rather have rodata, but this will suffice */
3655 	if (within_module_core(addr, event->module))
3656 		return true;
3657 
3658 	return false;
3659 }
3660 
3661 /**
3662  * ignore_event - Check dereferenced fields while writing to the seq buffer
3663  * @iter: The iterator that holds the seq buffer and the event being printed
3664  *
3665  * At boot up, test_event_printk() will flag any event that dereferences
3666  * a string with "%s" that does exist in the ring buffer. It may still
3667  * be valid, as the string may point to a static string in the kernel
3668  * rodata that never gets freed. But if the string pointer is pointing
3669  * to something that was allocated, there's a chance that it can be freed
3670  * by the time the user reads the trace. This would cause a bad memory
3671  * access by the kernel and possibly crash the system.
3672  *
3673  * This function will check if the event has any fields flagged as needing
3674  * to be checked at runtime and perform those checks.
3675  *
3676  * If it is found that a field is unsafe, it will write into the @iter->seq
3677  * a message stating what was found to be unsafe.
3678  *
3679  * @return: true if the event is unsafe and should be ignored,
3680  *          false otherwise.
3681  */
ignore_event(struct trace_iterator * iter)3682 bool ignore_event(struct trace_iterator *iter)
3683 {
3684 	struct ftrace_event_field *field;
3685 	struct trace_event *trace_event;
3686 	struct trace_event_call *event;
3687 	struct list_head *head;
3688 	struct trace_seq *seq;
3689 	const void *ptr;
3690 
3691 	trace_event = ftrace_find_event(iter->ent->type);
3692 
3693 	seq = &iter->seq;
3694 
3695 	if (!trace_event) {
3696 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3697 		return true;
3698 	}
3699 
3700 	event = container_of(trace_event, struct trace_event_call, event);
3701 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3702 		return false;
3703 
3704 	head = trace_get_fields(event);
3705 	if (!head) {
3706 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3707 				 trace_event_name(event));
3708 		return true;
3709 	}
3710 
3711 	/* Offsets are from the iter->ent that points to the raw event */
3712 	ptr = iter->ent;
3713 
3714 	list_for_each_entry(field, head, link) {
3715 		const char *str;
3716 		bool good;
3717 
3718 		if (!field->needs_test)
3719 			continue;
3720 
3721 		str = *(const char **)(ptr + field->offset);
3722 
3723 		good = trace_safe_str(iter, str);
3724 
3725 		/*
3726 		 * If you hit this warning, it is likely that the
3727 		 * trace event in question used %s on a string that
3728 		 * was saved at the time of the event, but may not be
3729 		 * around when the trace is read. Use __string(),
3730 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3731 		 * instead. See samples/trace_events/trace-events-sample.h
3732 		 * for reference.
3733 		 */
3734 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3735 			      trace_event_name(event), field->name)) {
3736 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3737 					 trace_event_name(event), field->name);
3738 			return true;
3739 		}
3740 	}
3741 	return false;
3742 }
3743 
trace_event_format(struct trace_iterator * iter,const char * fmt)3744 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3745 {
3746 	const char *p, *new_fmt;
3747 	char *q;
3748 
3749 	if (WARN_ON_ONCE(!fmt))
3750 		return fmt;
3751 
3752 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3753 		return fmt;
3754 
3755 	p = fmt;
3756 	new_fmt = q = iter->fmt;
3757 	while (*p) {
3758 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3759 			if (!trace_iter_expand_format(iter))
3760 				return fmt;
3761 
3762 			q += iter->fmt - new_fmt;
3763 			new_fmt = iter->fmt;
3764 		}
3765 
3766 		*q++ = *p++;
3767 
3768 		/* Replace %p with %px */
3769 		if (p[-1] == '%') {
3770 			if (p[0] == '%') {
3771 				*q++ = *p++;
3772 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3773 				*q++ = *p++;
3774 				*q++ = 'x';
3775 			}
3776 		}
3777 	}
3778 	*q = '\0';
3779 
3780 	return new_fmt;
3781 }
3782 
3783 #define STATIC_TEMP_BUF_SIZE	128
3784 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3785 
3786 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3787 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3788 					  int *ent_cpu, u64 *ent_ts)
3789 {
3790 	/* __find_next_entry will reset ent_size */
3791 	int ent_size = iter->ent_size;
3792 	struct trace_entry *entry;
3793 
3794 	/*
3795 	 * If called from ftrace_dump(), then the iter->temp buffer
3796 	 * will be the static_temp_buf and not created from kmalloc.
3797 	 * If the entry size is greater than the buffer, we can
3798 	 * not save it. Just return NULL in that case. This is only
3799 	 * used to add markers when two consecutive events' time
3800 	 * stamps have a large delta. See trace_print_lat_context()
3801 	 */
3802 	if (iter->temp == static_temp_buf &&
3803 	    STATIC_TEMP_BUF_SIZE < ent_size)
3804 		return NULL;
3805 
3806 	/*
3807 	 * The __find_next_entry() may call peek_next_entry(), which may
3808 	 * call ring_buffer_peek() that may make the contents of iter->ent
3809 	 * undefined. Need to copy iter->ent now.
3810 	 */
3811 	if (iter->ent && iter->ent != iter->temp) {
3812 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3813 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3814 			void *temp;
3815 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3816 			if (!temp)
3817 				return NULL;
3818 			kfree(iter->temp);
3819 			iter->temp = temp;
3820 			iter->temp_size = iter->ent_size;
3821 		}
3822 		memcpy(iter->temp, iter->ent, iter->ent_size);
3823 		iter->ent = iter->temp;
3824 	}
3825 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3826 	/* Put back the original ent_size */
3827 	iter->ent_size = ent_size;
3828 
3829 	return entry;
3830 }
3831 
3832 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3833 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3834 {
3835 	iter->ent = __find_next_entry(iter, &iter->cpu,
3836 				      &iter->lost_events, &iter->ts);
3837 
3838 	if (iter->ent)
3839 		trace_iterator_increment(iter);
3840 
3841 	return iter->ent ? iter : NULL;
3842 }
3843 
trace_consume(struct trace_iterator * iter)3844 static void trace_consume(struct trace_iterator *iter)
3845 {
3846 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3847 			    &iter->lost_events);
3848 }
3849 
s_next(struct seq_file * m,void * v,loff_t * pos)3850 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3851 {
3852 	struct trace_iterator *iter = m->private;
3853 	int i = (int)*pos;
3854 	void *ent;
3855 
3856 	WARN_ON_ONCE(iter->leftover);
3857 
3858 	(*pos)++;
3859 
3860 	/* can't go backwards */
3861 	if (iter->idx > i)
3862 		return NULL;
3863 
3864 	if (iter->idx < 0)
3865 		ent = trace_find_next_entry_inc(iter);
3866 	else
3867 		ent = iter;
3868 
3869 	while (ent && iter->idx < i)
3870 		ent = trace_find_next_entry_inc(iter);
3871 
3872 	iter->pos = *pos;
3873 
3874 	return ent;
3875 }
3876 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3877 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3878 {
3879 	struct ring_buffer_iter *buf_iter;
3880 	unsigned long entries = 0;
3881 	u64 ts;
3882 
3883 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3884 
3885 	buf_iter = trace_buffer_iter(iter, cpu);
3886 	if (!buf_iter)
3887 		return;
3888 
3889 	ring_buffer_iter_reset(buf_iter);
3890 
3891 	/*
3892 	 * We could have the case with the max latency tracers
3893 	 * that a reset never took place on a cpu. This is evident
3894 	 * by the timestamp being before the start of the buffer.
3895 	 */
3896 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3897 		if (ts >= iter->array_buffer->time_start)
3898 			break;
3899 		entries++;
3900 		ring_buffer_iter_advance(buf_iter);
3901 		/* This could be a big loop */
3902 		cond_resched();
3903 	}
3904 
3905 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3906 }
3907 
3908 /*
3909  * The current tracer is copied to avoid a global locking
3910  * all around.
3911  */
s_start(struct seq_file * m,loff_t * pos)3912 static void *s_start(struct seq_file *m, loff_t *pos)
3913 {
3914 	struct trace_iterator *iter = m->private;
3915 	struct trace_array *tr = iter->tr;
3916 	int cpu_file = iter->cpu_file;
3917 	void *p = NULL;
3918 	loff_t l = 0;
3919 	int cpu;
3920 
3921 	mutex_lock(&trace_types_lock);
3922 	if (unlikely(tr->current_trace != iter->trace)) {
3923 		/* Close iter->trace before switching to the new current tracer */
3924 		if (iter->trace->close)
3925 			iter->trace->close(iter);
3926 		iter->trace = tr->current_trace;
3927 		/* Reopen the new current tracer */
3928 		if (iter->trace->open)
3929 			iter->trace->open(iter);
3930 	}
3931 	mutex_unlock(&trace_types_lock);
3932 
3933 #ifdef CONFIG_TRACER_MAX_TRACE
3934 	if (iter->snapshot && iter->trace->use_max_tr)
3935 		return ERR_PTR(-EBUSY);
3936 #endif
3937 
3938 	if (*pos != iter->pos) {
3939 		iter->ent = NULL;
3940 		iter->cpu = 0;
3941 		iter->idx = -1;
3942 
3943 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3944 			for_each_tracing_cpu(cpu)
3945 				tracing_iter_reset(iter, cpu);
3946 		} else
3947 			tracing_iter_reset(iter, cpu_file);
3948 
3949 		iter->leftover = 0;
3950 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3951 			;
3952 
3953 	} else {
3954 		/*
3955 		 * If we overflowed the seq_file before, then we want
3956 		 * to just reuse the trace_seq buffer again.
3957 		 */
3958 		if (iter->leftover)
3959 			p = iter;
3960 		else {
3961 			l = *pos - 1;
3962 			p = s_next(m, p, &l);
3963 		}
3964 	}
3965 
3966 	trace_event_read_lock();
3967 	trace_access_lock(cpu_file);
3968 	return p;
3969 }
3970 
s_stop(struct seq_file * m,void * p)3971 static void s_stop(struct seq_file *m, void *p)
3972 {
3973 	struct trace_iterator *iter = m->private;
3974 
3975 #ifdef CONFIG_TRACER_MAX_TRACE
3976 	if (iter->snapshot && iter->trace->use_max_tr)
3977 		return;
3978 #endif
3979 
3980 	trace_access_unlock(iter->cpu_file);
3981 	trace_event_read_unlock();
3982 }
3983 
3984 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3985 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3986 		      unsigned long *entries, int cpu)
3987 {
3988 	unsigned long count;
3989 
3990 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3991 	/*
3992 	 * If this buffer has skipped entries, then we hold all
3993 	 * entries for the trace and we need to ignore the
3994 	 * ones before the time stamp.
3995 	 */
3996 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3997 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3998 		/* total is the same as the entries */
3999 		*total = count;
4000 	} else
4001 		*total = count +
4002 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4003 	*entries = count;
4004 }
4005 
4006 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4007 get_total_entries(struct array_buffer *buf,
4008 		  unsigned long *total, unsigned long *entries)
4009 {
4010 	unsigned long t, e;
4011 	int cpu;
4012 
4013 	*total = 0;
4014 	*entries = 0;
4015 
4016 	for_each_tracing_cpu(cpu) {
4017 		get_total_entries_cpu(buf, &t, &e, cpu);
4018 		*total += t;
4019 		*entries += e;
4020 	}
4021 }
4022 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4023 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4024 {
4025 	unsigned long total, entries;
4026 
4027 	if (!tr)
4028 		tr = &global_trace;
4029 
4030 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4031 
4032 	return entries;
4033 }
4034 
trace_total_entries(struct trace_array * tr)4035 unsigned long trace_total_entries(struct trace_array *tr)
4036 {
4037 	unsigned long total, entries;
4038 
4039 	if (!tr)
4040 		tr = &global_trace;
4041 
4042 	get_total_entries(&tr->array_buffer, &total, &entries);
4043 
4044 	return entries;
4045 }
4046 
print_lat_help_header(struct seq_file * m)4047 static void print_lat_help_header(struct seq_file *m)
4048 {
4049 	seq_puts(m, "#                    _------=> CPU#            \n"
4050 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4051 		    "#                  | / _----=> need-resched    \n"
4052 		    "#                  || / _---=> hardirq/softirq \n"
4053 		    "#                  ||| / _--=> preempt-depth   \n"
4054 		    "#                  |||| / _-=> migrate-disable \n"
4055 		    "#                  ||||| /     delay           \n"
4056 		    "#  cmd     pid     |||||| time  |   caller     \n"
4057 		    "#     \\   /        ||||||  \\    |    /       \n");
4058 }
4059 
print_event_info(struct array_buffer * buf,struct seq_file * m)4060 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4061 {
4062 	unsigned long total;
4063 	unsigned long entries;
4064 
4065 	get_total_entries(buf, &total, &entries);
4066 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4067 		   entries, total, num_online_cpus());
4068 	seq_puts(m, "#\n");
4069 }
4070 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4071 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4072 				   unsigned int flags)
4073 {
4074 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4075 
4076 	print_event_info(buf, m);
4077 
4078 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4079 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4080 }
4081 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4082 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4083 				       unsigned int flags)
4084 {
4085 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4086 	static const char space[] = "            ";
4087 	int prec = tgid ? 12 : 2;
4088 
4089 	print_event_info(buf, m);
4090 
4091 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4092 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4093 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4094 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4095 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4096 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4097 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4098 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4099 }
4100 
4101 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4102 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4103 {
4104 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4105 	struct array_buffer *buf = iter->array_buffer;
4106 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4107 	struct tracer *type = iter->trace;
4108 	unsigned long entries;
4109 	unsigned long total;
4110 	const char *name = type->name;
4111 
4112 	get_total_entries(buf, &total, &entries);
4113 
4114 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4115 		   name, init_utsname()->release);
4116 	seq_puts(m, "# -----------------------------------"
4117 		 "---------------------------------\n");
4118 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4119 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4120 		   nsecs_to_usecs(data->saved_latency),
4121 		   entries,
4122 		   total,
4123 		   buf->cpu,
4124 		   preempt_model_none()      ? "server" :
4125 		   preempt_model_voluntary() ? "desktop" :
4126 		   preempt_model_full()      ? "preempt" :
4127 		   preempt_model_rt()        ? "preempt_rt" :
4128 		   "unknown",
4129 		   /* These are reserved for later use */
4130 		   0, 0, 0, 0);
4131 #ifdef CONFIG_SMP
4132 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4133 #else
4134 	seq_puts(m, ")\n");
4135 #endif
4136 	seq_puts(m, "#    -----------------\n");
4137 	seq_printf(m, "#    | task: %.16s-%d "
4138 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4139 		   data->comm, data->pid,
4140 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4141 		   data->policy, data->rt_priority);
4142 	seq_puts(m, "#    -----------------\n");
4143 
4144 	if (data->critical_start) {
4145 		seq_puts(m, "#  => started at: ");
4146 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4147 		trace_print_seq(m, &iter->seq);
4148 		seq_puts(m, "\n#  => ended at:   ");
4149 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4150 		trace_print_seq(m, &iter->seq);
4151 		seq_puts(m, "\n#\n");
4152 	}
4153 
4154 	seq_puts(m, "#\n");
4155 }
4156 
test_cpu_buff_start(struct trace_iterator * iter)4157 static void test_cpu_buff_start(struct trace_iterator *iter)
4158 {
4159 	struct trace_seq *s = &iter->seq;
4160 	struct trace_array *tr = iter->tr;
4161 
4162 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4163 		return;
4164 
4165 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4166 		return;
4167 
4168 	if (cpumask_available(iter->started) &&
4169 	    cpumask_test_cpu(iter->cpu, iter->started))
4170 		return;
4171 
4172 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4173 		return;
4174 
4175 	if (cpumask_available(iter->started))
4176 		cpumask_set_cpu(iter->cpu, iter->started);
4177 
4178 	/* Don't print started cpu buffer for the first entry of the trace */
4179 	if (iter->idx > 1)
4180 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4181 				iter->cpu);
4182 }
4183 
print_trace_fmt(struct trace_iterator * iter)4184 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4185 {
4186 	struct trace_array *tr = iter->tr;
4187 	struct trace_seq *s = &iter->seq;
4188 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4189 	struct trace_entry *entry;
4190 	struct trace_event *event;
4191 
4192 	entry = iter->ent;
4193 
4194 	test_cpu_buff_start(iter);
4195 
4196 	event = ftrace_find_event(entry->type);
4197 
4198 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4199 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4200 			trace_print_lat_context(iter);
4201 		else
4202 			trace_print_context(iter);
4203 	}
4204 
4205 	if (trace_seq_has_overflowed(s))
4206 		return TRACE_TYPE_PARTIAL_LINE;
4207 
4208 	if (event) {
4209 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4210 			return print_event_fields(iter, event);
4211 		/*
4212 		 * For TRACE_EVENT() events, the print_fmt is not
4213 		 * safe to use if the array has delta offsets
4214 		 * Force printing via the fields.
4215 		 */
4216 		if ((tr->text_delta || tr->data_delta) &&
4217 		    event->type > __TRACE_LAST_TYPE)
4218 			return print_event_fields(iter, event);
4219 
4220 		return event->funcs->trace(iter, sym_flags, event);
4221 	}
4222 
4223 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4224 
4225 	return trace_handle_return(s);
4226 }
4227 
print_raw_fmt(struct trace_iterator * iter)4228 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4229 {
4230 	struct trace_array *tr = iter->tr;
4231 	struct trace_seq *s = &iter->seq;
4232 	struct trace_entry *entry;
4233 	struct trace_event *event;
4234 
4235 	entry = iter->ent;
4236 
4237 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4238 		trace_seq_printf(s, "%d %d %llu ",
4239 				 entry->pid, iter->cpu, iter->ts);
4240 
4241 	if (trace_seq_has_overflowed(s))
4242 		return TRACE_TYPE_PARTIAL_LINE;
4243 
4244 	event = ftrace_find_event(entry->type);
4245 	if (event)
4246 		return event->funcs->raw(iter, 0, event);
4247 
4248 	trace_seq_printf(s, "%d ?\n", entry->type);
4249 
4250 	return trace_handle_return(s);
4251 }
4252 
print_hex_fmt(struct trace_iterator * iter)4253 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4254 {
4255 	struct trace_array *tr = iter->tr;
4256 	struct trace_seq *s = &iter->seq;
4257 	unsigned char newline = '\n';
4258 	struct trace_entry *entry;
4259 	struct trace_event *event;
4260 
4261 	entry = iter->ent;
4262 
4263 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4264 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4265 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4266 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4267 		if (trace_seq_has_overflowed(s))
4268 			return TRACE_TYPE_PARTIAL_LINE;
4269 	}
4270 
4271 	event = ftrace_find_event(entry->type);
4272 	if (event) {
4273 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4274 		if (ret != TRACE_TYPE_HANDLED)
4275 			return ret;
4276 	}
4277 
4278 	SEQ_PUT_FIELD(s, newline);
4279 
4280 	return trace_handle_return(s);
4281 }
4282 
print_bin_fmt(struct trace_iterator * iter)4283 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4284 {
4285 	struct trace_array *tr = iter->tr;
4286 	struct trace_seq *s = &iter->seq;
4287 	struct trace_entry *entry;
4288 	struct trace_event *event;
4289 
4290 	entry = iter->ent;
4291 
4292 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4293 		SEQ_PUT_FIELD(s, entry->pid);
4294 		SEQ_PUT_FIELD(s, iter->cpu);
4295 		SEQ_PUT_FIELD(s, iter->ts);
4296 		if (trace_seq_has_overflowed(s))
4297 			return TRACE_TYPE_PARTIAL_LINE;
4298 	}
4299 
4300 	event = ftrace_find_event(entry->type);
4301 	return event ? event->funcs->binary(iter, 0, event) :
4302 		TRACE_TYPE_HANDLED;
4303 }
4304 
trace_empty(struct trace_iterator * iter)4305 int trace_empty(struct trace_iterator *iter)
4306 {
4307 	struct ring_buffer_iter *buf_iter;
4308 	int cpu;
4309 
4310 	/* If we are looking at one CPU buffer, only check that one */
4311 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4312 		cpu = iter->cpu_file;
4313 		buf_iter = trace_buffer_iter(iter, cpu);
4314 		if (buf_iter) {
4315 			if (!ring_buffer_iter_empty(buf_iter))
4316 				return 0;
4317 		} else {
4318 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4319 				return 0;
4320 		}
4321 		return 1;
4322 	}
4323 
4324 	for_each_tracing_cpu(cpu) {
4325 		buf_iter = trace_buffer_iter(iter, cpu);
4326 		if (buf_iter) {
4327 			if (!ring_buffer_iter_empty(buf_iter))
4328 				return 0;
4329 		} else {
4330 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4331 				return 0;
4332 		}
4333 	}
4334 
4335 	return 1;
4336 }
4337 
4338 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4339 enum print_line_t print_trace_line(struct trace_iterator *iter)
4340 {
4341 	struct trace_array *tr = iter->tr;
4342 	unsigned long trace_flags = tr->trace_flags;
4343 	enum print_line_t ret;
4344 
4345 	if (iter->lost_events) {
4346 		if (iter->lost_events == (unsigned long)-1)
4347 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4348 					 iter->cpu);
4349 		else
4350 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4351 					 iter->cpu, iter->lost_events);
4352 		if (trace_seq_has_overflowed(&iter->seq))
4353 			return TRACE_TYPE_PARTIAL_LINE;
4354 	}
4355 
4356 	if (iter->trace && iter->trace->print_line) {
4357 		ret = iter->trace->print_line(iter);
4358 		if (ret != TRACE_TYPE_UNHANDLED)
4359 			return ret;
4360 	}
4361 
4362 	if (iter->ent->type == TRACE_BPUTS &&
4363 			trace_flags & TRACE_ITER_PRINTK &&
4364 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4365 		return trace_print_bputs_msg_only(iter);
4366 
4367 	if (iter->ent->type == TRACE_BPRINT &&
4368 			trace_flags & TRACE_ITER_PRINTK &&
4369 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4370 		return trace_print_bprintk_msg_only(iter);
4371 
4372 	if (iter->ent->type == TRACE_PRINT &&
4373 			trace_flags & TRACE_ITER_PRINTK &&
4374 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4375 		return trace_print_printk_msg_only(iter);
4376 
4377 	if (trace_flags & TRACE_ITER_BIN)
4378 		return print_bin_fmt(iter);
4379 
4380 	if (trace_flags & TRACE_ITER_HEX)
4381 		return print_hex_fmt(iter);
4382 
4383 	if (trace_flags & TRACE_ITER_RAW)
4384 		return print_raw_fmt(iter);
4385 
4386 	return print_trace_fmt(iter);
4387 }
4388 
trace_latency_header(struct seq_file * m)4389 void trace_latency_header(struct seq_file *m)
4390 {
4391 	struct trace_iterator *iter = m->private;
4392 	struct trace_array *tr = iter->tr;
4393 
4394 	/* print nothing if the buffers are empty */
4395 	if (trace_empty(iter))
4396 		return;
4397 
4398 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4399 		print_trace_header(m, iter);
4400 
4401 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4402 		print_lat_help_header(m);
4403 }
4404 
trace_default_header(struct seq_file * m)4405 void trace_default_header(struct seq_file *m)
4406 {
4407 	struct trace_iterator *iter = m->private;
4408 	struct trace_array *tr = iter->tr;
4409 	unsigned long trace_flags = tr->trace_flags;
4410 
4411 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4412 		return;
4413 
4414 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4415 		/* print nothing if the buffers are empty */
4416 		if (trace_empty(iter))
4417 			return;
4418 		print_trace_header(m, iter);
4419 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4420 			print_lat_help_header(m);
4421 	} else {
4422 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4423 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4424 				print_func_help_header_irq(iter->array_buffer,
4425 							   m, trace_flags);
4426 			else
4427 				print_func_help_header(iter->array_buffer, m,
4428 						       trace_flags);
4429 		}
4430 	}
4431 }
4432 
test_ftrace_alive(struct seq_file * m)4433 static void test_ftrace_alive(struct seq_file *m)
4434 {
4435 	if (!ftrace_is_dead())
4436 		return;
4437 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4438 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4439 }
4440 
4441 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4442 static void show_snapshot_main_help(struct seq_file *m)
4443 {
4444 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4445 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4446 		    "#                      Takes a snapshot of the main buffer.\n"
4447 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4448 		    "#                      (Doesn't have to be '2' works with any number that\n"
4449 		    "#                       is not a '0' or '1')\n");
4450 }
4451 
show_snapshot_percpu_help(struct seq_file * m)4452 static void show_snapshot_percpu_help(struct seq_file *m)
4453 {
4454 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4455 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4456 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4457 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4458 #else
4459 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4460 		    "#                     Must use main snapshot file to allocate.\n");
4461 #endif
4462 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4463 		    "#                      (Doesn't have to be '2' works with any number that\n"
4464 		    "#                       is not a '0' or '1')\n");
4465 }
4466 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4467 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4468 {
4469 	if (iter->tr->allocated_snapshot)
4470 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4471 	else
4472 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4473 
4474 	seq_puts(m, "# Snapshot commands:\n");
4475 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4476 		show_snapshot_main_help(m);
4477 	else
4478 		show_snapshot_percpu_help(m);
4479 }
4480 #else
4481 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4482 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4483 #endif
4484 
s_show(struct seq_file * m,void * v)4485 static int s_show(struct seq_file *m, void *v)
4486 {
4487 	struct trace_iterator *iter = v;
4488 	int ret;
4489 
4490 	if (iter->ent == NULL) {
4491 		if (iter->tr) {
4492 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4493 			seq_puts(m, "#\n");
4494 			test_ftrace_alive(m);
4495 		}
4496 		if (iter->snapshot && trace_empty(iter))
4497 			print_snapshot_help(m, iter);
4498 		else if (iter->trace && iter->trace->print_header)
4499 			iter->trace->print_header(m);
4500 		else
4501 			trace_default_header(m);
4502 
4503 	} else if (iter->leftover) {
4504 		/*
4505 		 * If we filled the seq_file buffer earlier, we
4506 		 * want to just show it now.
4507 		 */
4508 		ret = trace_print_seq(m, &iter->seq);
4509 
4510 		/* ret should this time be zero, but you never know */
4511 		iter->leftover = ret;
4512 
4513 	} else {
4514 		ret = print_trace_line(iter);
4515 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4516 			iter->seq.full = 0;
4517 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4518 		}
4519 		ret = trace_print_seq(m, &iter->seq);
4520 		/*
4521 		 * If we overflow the seq_file buffer, then it will
4522 		 * ask us for this data again at start up.
4523 		 * Use that instead.
4524 		 *  ret is 0 if seq_file write succeeded.
4525 		 *        -1 otherwise.
4526 		 */
4527 		iter->leftover = ret;
4528 	}
4529 
4530 	return 0;
4531 }
4532 
4533 /*
4534  * Should be used after trace_array_get(), trace_types_lock
4535  * ensures that i_cdev was already initialized.
4536  */
tracing_get_cpu(struct inode * inode)4537 static inline int tracing_get_cpu(struct inode *inode)
4538 {
4539 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4540 		return (long)inode->i_cdev - 1;
4541 	return RING_BUFFER_ALL_CPUS;
4542 }
4543 
4544 static const struct seq_operations tracer_seq_ops = {
4545 	.start		= s_start,
4546 	.next		= s_next,
4547 	.stop		= s_stop,
4548 	.show		= s_show,
4549 };
4550 
4551 /*
4552  * Note, as iter itself can be allocated and freed in different
4553  * ways, this function is only used to free its content, and not
4554  * the iterator itself. The only requirement to all the allocations
4555  * is that it must zero all fields (kzalloc), as freeing works with
4556  * ethier allocated content or NULL.
4557  */
free_trace_iter_content(struct trace_iterator * iter)4558 static void free_trace_iter_content(struct trace_iterator *iter)
4559 {
4560 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4561 	if (iter->fmt != static_fmt_buf)
4562 		kfree(iter->fmt);
4563 
4564 	kfree(iter->temp);
4565 	kfree(iter->buffer_iter);
4566 	mutex_destroy(&iter->mutex);
4567 	free_cpumask_var(iter->started);
4568 }
4569 
4570 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4571 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4572 {
4573 	struct trace_array *tr = inode->i_private;
4574 	struct trace_iterator *iter;
4575 	int cpu;
4576 
4577 	if (tracing_disabled)
4578 		return ERR_PTR(-ENODEV);
4579 
4580 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4581 	if (!iter)
4582 		return ERR_PTR(-ENOMEM);
4583 
4584 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4585 				    GFP_KERNEL);
4586 	if (!iter->buffer_iter)
4587 		goto release;
4588 
4589 	/*
4590 	 * trace_find_next_entry() may need to save off iter->ent.
4591 	 * It will place it into the iter->temp buffer. As most
4592 	 * events are less than 128, allocate a buffer of that size.
4593 	 * If one is greater, then trace_find_next_entry() will
4594 	 * allocate a new buffer to adjust for the bigger iter->ent.
4595 	 * It's not critical if it fails to get allocated here.
4596 	 */
4597 	iter->temp = kmalloc(128, GFP_KERNEL);
4598 	if (iter->temp)
4599 		iter->temp_size = 128;
4600 
4601 	/*
4602 	 * trace_event_printf() may need to modify given format
4603 	 * string to replace %p with %px so that it shows real address
4604 	 * instead of hash value. However, that is only for the event
4605 	 * tracing, other tracer may not need. Defer the allocation
4606 	 * until it is needed.
4607 	 */
4608 	iter->fmt = NULL;
4609 	iter->fmt_size = 0;
4610 
4611 	mutex_lock(&trace_types_lock);
4612 	iter->trace = tr->current_trace;
4613 
4614 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4615 		goto fail;
4616 
4617 	iter->tr = tr;
4618 
4619 #ifdef CONFIG_TRACER_MAX_TRACE
4620 	/* Currently only the top directory has a snapshot */
4621 	if (tr->current_trace->print_max || snapshot)
4622 		iter->array_buffer = &tr->max_buffer;
4623 	else
4624 #endif
4625 		iter->array_buffer = &tr->array_buffer;
4626 	iter->snapshot = snapshot;
4627 	iter->pos = -1;
4628 	iter->cpu_file = tracing_get_cpu(inode);
4629 	mutex_init(&iter->mutex);
4630 
4631 	/* Notify the tracer early; before we stop tracing. */
4632 	if (iter->trace->open)
4633 		iter->trace->open(iter);
4634 
4635 	/* Annotate start of buffers if we had overruns */
4636 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4637 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4638 
4639 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4640 	if (trace_clocks[tr->clock_id].in_ns)
4641 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4642 
4643 	/*
4644 	 * If pause-on-trace is enabled, then stop the trace while
4645 	 * dumping, unless this is the "snapshot" file
4646 	 */
4647 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4648 		tracing_stop_tr(tr);
4649 
4650 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4651 		for_each_tracing_cpu(cpu) {
4652 			iter->buffer_iter[cpu] =
4653 				ring_buffer_read_start(iter->array_buffer->buffer,
4654 						       cpu, GFP_KERNEL);
4655 			tracing_iter_reset(iter, cpu);
4656 		}
4657 	} else {
4658 		cpu = iter->cpu_file;
4659 		iter->buffer_iter[cpu] =
4660 			ring_buffer_read_start(iter->array_buffer->buffer,
4661 					       cpu, GFP_KERNEL);
4662 		tracing_iter_reset(iter, cpu);
4663 	}
4664 
4665 	mutex_unlock(&trace_types_lock);
4666 
4667 	return iter;
4668 
4669  fail:
4670 	mutex_unlock(&trace_types_lock);
4671 	free_trace_iter_content(iter);
4672 release:
4673 	seq_release_private(inode, file);
4674 	return ERR_PTR(-ENOMEM);
4675 }
4676 
tracing_open_generic(struct inode * inode,struct file * filp)4677 int tracing_open_generic(struct inode *inode, struct file *filp)
4678 {
4679 	int ret;
4680 
4681 	ret = tracing_check_open_get_tr(NULL);
4682 	if (ret)
4683 		return ret;
4684 
4685 	filp->private_data = inode->i_private;
4686 	return 0;
4687 }
4688 
tracing_is_disabled(void)4689 bool tracing_is_disabled(void)
4690 {
4691 	return (tracing_disabled) ? true: false;
4692 }
4693 
4694 /*
4695  * Open and update trace_array ref count.
4696  * Must have the current trace_array passed to it.
4697  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4698 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4699 {
4700 	struct trace_array *tr = inode->i_private;
4701 	int ret;
4702 
4703 	ret = tracing_check_open_get_tr(tr);
4704 	if (ret)
4705 		return ret;
4706 
4707 	filp->private_data = inode->i_private;
4708 
4709 	return 0;
4710 }
4711 
4712 /*
4713  * The private pointer of the inode is the trace_event_file.
4714  * Update the tr ref count associated to it.
4715  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4716 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4717 {
4718 	struct trace_event_file *file = inode->i_private;
4719 	int ret;
4720 
4721 	ret = tracing_check_open_get_tr(file->tr);
4722 	if (ret)
4723 		return ret;
4724 
4725 	mutex_lock(&event_mutex);
4726 
4727 	/* Fail if the file is marked for removal */
4728 	if (file->flags & EVENT_FILE_FL_FREED) {
4729 		trace_array_put(file->tr);
4730 		ret = -ENODEV;
4731 	} else {
4732 		event_file_get(file);
4733 	}
4734 
4735 	mutex_unlock(&event_mutex);
4736 	if (ret)
4737 		return ret;
4738 
4739 	filp->private_data = inode->i_private;
4740 
4741 	return 0;
4742 }
4743 
tracing_release_file_tr(struct inode * inode,struct file * filp)4744 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4745 {
4746 	struct trace_event_file *file = inode->i_private;
4747 
4748 	trace_array_put(file->tr);
4749 	event_file_put(file);
4750 
4751 	return 0;
4752 }
4753 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4754 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4755 {
4756 	tracing_release_file_tr(inode, filp);
4757 	return single_release(inode, filp);
4758 }
4759 
tracing_mark_open(struct inode * inode,struct file * filp)4760 static int tracing_mark_open(struct inode *inode, struct file *filp)
4761 {
4762 	stream_open(inode, filp);
4763 	return tracing_open_generic_tr(inode, filp);
4764 }
4765 
tracing_release(struct inode * inode,struct file * file)4766 static int tracing_release(struct inode *inode, struct file *file)
4767 {
4768 	struct trace_array *tr = inode->i_private;
4769 	struct seq_file *m = file->private_data;
4770 	struct trace_iterator *iter;
4771 	int cpu;
4772 
4773 	if (!(file->f_mode & FMODE_READ)) {
4774 		trace_array_put(tr);
4775 		return 0;
4776 	}
4777 
4778 	/* Writes do not use seq_file */
4779 	iter = m->private;
4780 	mutex_lock(&trace_types_lock);
4781 
4782 	for_each_tracing_cpu(cpu) {
4783 		if (iter->buffer_iter[cpu])
4784 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4785 	}
4786 
4787 	if (iter->trace && iter->trace->close)
4788 		iter->trace->close(iter);
4789 
4790 	if (!iter->snapshot && tr->stop_count)
4791 		/* reenable tracing if it was previously enabled */
4792 		tracing_start_tr(tr);
4793 
4794 	__trace_array_put(tr);
4795 
4796 	mutex_unlock(&trace_types_lock);
4797 
4798 	free_trace_iter_content(iter);
4799 	seq_release_private(inode, file);
4800 
4801 	return 0;
4802 }
4803 
tracing_release_generic_tr(struct inode * inode,struct file * file)4804 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4805 {
4806 	struct trace_array *tr = inode->i_private;
4807 
4808 	trace_array_put(tr);
4809 	return 0;
4810 }
4811 
tracing_single_release_tr(struct inode * inode,struct file * file)4812 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4813 {
4814 	struct trace_array *tr = inode->i_private;
4815 
4816 	trace_array_put(tr);
4817 
4818 	return single_release(inode, file);
4819 }
4820 
tracing_open(struct inode * inode,struct file * file)4821 static int tracing_open(struct inode *inode, struct file *file)
4822 {
4823 	struct trace_array *tr = inode->i_private;
4824 	struct trace_iterator *iter;
4825 	int ret;
4826 
4827 	ret = tracing_check_open_get_tr(tr);
4828 	if (ret)
4829 		return ret;
4830 
4831 	/* If this file was open for write, then erase contents */
4832 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4833 		int cpu = tracing_get_cpu(inode);
4834 		struct array_buffer *trace_buf = &tr->array_buffer;
4835 
4836 #ifdef CONFIG_TRACER_MAX_TRACE
4837 		if (tr->current_trace->print_max)
4838 			trace_buf = &tr->max_buffer;
4839 #endif
4840 
4841 		if (cpu == RING_BUFFER_ALL_CPUS)
4842 			tracing_reset_online_cpus(trace_buf);
4843 		else
4844 			tracing_reset_cpu(trace_buf, cpu);
4845 	}
4846 
4847 	if (file->f_mode & FMODE_READ) {
4848 		iter = __tracing_open(inode, file, false);
4849 		if (IS_ERR(iter))
4850 			ret = PTR_ERR(iter);
4851 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4852 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4853 	}
4854 
4855 	if (ret < 0)
4856 		trace_array_put(tr);
4857 
4858 	return ret;
4859 }
4860 
4861 /*
4862  * Some tracers are not suitable for instance buffers.
4863  * A tracer is always available for the global array (toplevel)
4864  * or if it explicitly states that it is.
4865  */
4866 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4867 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4868 {
4869 #ifdef CONFIG_TRACER_SNAPSHOT
4870 	/* arrays with mapped buffer range do not have snapshots */
4871 	if (tr->range_addr_start && t->use_max_tr)
4872 		return false;
4873 #endif
4874 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4875 }
4876 
4877 /* Find the next tracer that this trace array may use */
4878 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4879 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4880 {
4881 	while (t && !trace_ok_for_array(t, tr))
4882 		t = t->next;
4883 
4884 	return t;
4885 }
4886 
4887 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4888 t_next(struct seq_file *m, void *v, loff_t *pos)
4889 {
4890 	struct trace_array *tr = m->private;
4891 	struct tracer *t = v;
4892 
4893 	(*pos)++;
4894 
4895 	if (t)
4896 		t = get_tracer_for_array(tr, t->next);
4897 
4898 	return t;
4899 }
4900 
t_start(struct seq_file * m,loff_t * pos)4901 static void *t_start(struct seq_file *m, loff_t *pos)
4902 {
4903 	struct trace_array *tr = m->private;
4904 	struct tracer *t;
4905 	loff_t l = 0;
4906 
4907 	mutex_lock(&trace_types_lock);
4908 
4909 	t = get_tracer_for_array(tr, trace_types);
4910 	for (; t && l < *pos; t = t_next(m, t, &l))
4911 			;
4912 
4913 	return t;
4914 }
4915 
t_stop(struct seq_file * m,void * p)4916 static void t_stop(struct seq_file *m, void *p)
4917 {
4918 	mutex_unlock(&trace_types_lock);
4919 }
4920 
t_show(struct seq_file * m,void * v)4921 static int t_show(struct seq_file *m, void *v)
4922 {
4923 	struct tracer *t = v;
4924 
4925 	if (!t)
4926 		return 0;
4927 
4928 	seq_puts(m, t->name);
4929 	if (t->next)
4930 		seq_putc(m, ' ');
4931 	else
4932 		seq_putc(m, '\n');
4933 
4934 	return 0;
4935 }
4936 
4937 static const struct seq_operations show_traces_seq_ops = {
4938 	.start		= t_start,
4939 	.next		= t_next,
4940 	.stop		= t_stop,
4941 	.show		= t_show,
4942 };
4943 
show_traces_open(struct inode * inode,struct file * file)4944 static int show_traces_open(struct inode *inode, struct file *file)
4945 {
4946 	struct trace_array *tr = inode->i_private;
4947 	struct seq_file *m;
4948 	int ret;
4949 
4950 	ret = tracing_check_open_get_tr(tr);
4951 	if (ret)
4952 		return ret;
4953 
4954 	ret = seq_open(file, &show_traces_seq_ops);
4955 	if (ret) {
4956 		trace_array_put(tr);
4957 		return ret;
4958 	}
4959 
4960 	m = file->private_data;
4961 	m->private = tr;
4962 
4963 	return 0;
4964 }
4965 
tracing_seq_release(struct inode * inode,struct file * file)4966 static int tracing_seq_release(struct inode *inode, struct file *file)
4967 {
4968 	struct trace_array *tr = inode->i_private;
4969 
4970 	trace_array_put(tr);
4971 	return seq_release(inode, file);
4972 }
4973 
4974 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4975 tracing_write_stub(struct file *filp, const char __user *ubuf,
4976 		   size_t count, loff_t *ppos)
4977 {
4978 	return count;
4979 }
4980 
tracing_lseek(struct file * file,loff_t offset,int whence)4981 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4982 {
4983 	int ret;
4984 
4985 	if (file->f_mode & FMODE_READ)
4986 		ret = seq_lseek(file, offset, whence);
4987 	else
4988 		file->f_pos = ret = 0;
4989 
4990 	return ret;
4991 }
4992 
4993 static const struct file_operations tracing_fops = {
4994 	.open		= tracing_open,
4995 	.read		= seq_read,
4996 	.read_iter	= seq_read_iter,
4997 	.splice_read	= copy_splice_read,
4998 	.write		= tracing_write_stub,
4999 	.llseek		= tracing_lseek,
5000 	.release	= tracing_release,
5001 };
5002 
5003 static const struct file_operations show_traces_fops = {
5004 	.open		= show_traces_open,
5005 	.read		= seq_read,
5006 	.llseek		= seq_lseek,
5007 	.release	= tracing_seq_release,
5008 };
5009 
5010 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5011 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5012 		     size_t count, loff_t *ppos)
5013 {
5014 	struct trace_array *tr = file_inode(filp)->i_private;
5015 	char *mask_str;
5016 	int len;
5017 
5018 	len = snprintf(NULL, 0, "%*pb\n",
5019 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5020 	mask_str = kmalloc(len, GFP_KERNEL);
5021 	if (!mask_str)
5022 		return -ENOMEM;
5023 
5024 	len = snprintf(mask_str, len, "%*pb\n",
5025 		       cpumask_pr_args(tr->tracing_cpumask));
5026 	if (len >= count) {
5027 		count = -EINVAL;
5028 		goto out_err;
5029 	}
5030 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5031 
5032 out_err:
5033 	kfree(mask_str);
5034 
5035 	return count;
5036 }
5037 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5038 int tracing_set_cpumask(struct trace_array *tr,
5039 			cpumask_var_t tracing_cpumask_new)
5040 {
5041 	int cpu;
5042 
5043 	if (!tr)
5044 		return -EINVAL;
5045 
5046 	local_irq_disable();
5047 	arch_spin_lock(&tr->max_lock);
5048 	for_each_tracing_cpu(cpu) {
5049 		/*
5050 		 * Increase/decrease the disabled counter if we are
5051 		 * about to flip a bit in the cpumask:
5052 		 */
5053 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5054 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5055 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5056 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5057 #ifdef CONFIG_TRACER_MAX_TRACE
5058 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5059 #endif
5060 		}
5061 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5062 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5063 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5064 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5065 #ifdef CONFIG_TRACER_MAX_TRACE
5066 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5067 #endif
5068 		}
5069 	}
5070 	arch_spin_unlock(&tr->max_lock);
5071 	local_irq_enable();
5072 
5073 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5074 
5075 	return 0;
5076 }
5077 
5078 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5079 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5080 		      size_t count, loff_t *ppos)
5081 {
5082 	struct trace_array *tr = file_inode(filp)->i_private;
5083 	cpumask_var_t tracing_cpumask_new;
5084 	int err;
5085 
5086 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5087 		return -EINVAL;
5088 
5089 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5090 		return -ENOMEM;
5091 
5092 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5093 	if (err)
5094 		goto err_free;
5095 
5096 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5097 	if (err)
5098 		goto err_free;
5099 
5100 	free_cpumask_var(tracing_cpumask_new);
5101 
5102 	return count;
5103 
5104 err_free:
5105 	free_cpumask_var(tracing_cpumask_new);
5106 
5107 	return err;
5108 }
5109 
5110 static const struct file_operations tracing_cpumask_fops = {
5111 	.open		= tracing_open_generic_tr,
5112 	.read		= tracing_cpumask_read,
5113 	.write		= tracing_cpumask_write,
5114 	.release	= tracing_release_generic_tr,
5115 	.llseek		= generic_file_llseek,
5116 };
5117 
tracing_trace_options_show(struct seq_file * m,void * v)5118 static int tracing_trace_options_show(struct seq_file *m, void *v)
5119 {
5120 	struct tracer_opt *trace_opts;
5121 	struct trace_array *tr = m->private;
5122 	u32 tracer_flags;
5123 	int i;
5124 
5125 	guard(mutex)(&trace_types_lock);
5126 
5127 	tracer_flags = tr->current_trace->flags->val;
5128 	trace_opts = tr->current_trace->flags->opts;
5129 
5130 	for (i = 0; trace_options[i]; i++) {
5131 		if (tr->trace_flags & (1 << i))
5132 			seq_printf(m, "%s\n", trace_options[i]);
5133 		else
5134 			seq_printf(m, "no%s\n", trace_options[i]);
5135 	}
5136 
5137 	for (i = 0; trace_opts[i].name; i++) {
5138 		if (tracer_flags & trace_opts[i].bit)
5139 			seq_printf(m, "%s\n", trace_opts[i].name);
5140 		else
5141 			seq_printf(m, "no%s\n", trace_opts[i].name);
5142 	}
5143 
5144 	return 0;
5145 }
5146 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5147 static int __set_tracer_option(struct trace_array *tr,
5148 			       struct tracer_flags *tracer_flags,
5149 			       struct tracer_opt *opts, int neg)
5150 {
5151 	struct tracer *trace = tracer_flags->trace;
5152 	int ret;
5153 
5154 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5155 	if (ret)
5156 		return ret;
5157 
5158 	if (neg)
5159 		tracer_flags->val &= ~opts->bit;
5160 	else
5161 		tracer_flags->val |= opts->bit;
5162 	return 0;
5163 }
5164 
5165 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5166 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5167 {
5168 	struct tracer *trace = tr->current_trace;
5169 	struct tracer_flags *tracer_flags = trace->flags;
5170 	struct tracer_opt *opts = NULL;
5171 	int i;
5172 
5173 	for (i = 0; tracer_flags->opts[i].name; i++) {
5174 		opts = &tracer_flags->opts[i];
5175 
5176 		if (strcmp(cmp, opts->name) == 0)
5177 			return __set_tracer_option(tr, trace->flags, opts, neg);
5178 	}
5179 
5180 	return -EINVAL;
5181 }
5182 
5183 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5184 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5185 {
5186 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5187 		return -1;
5188 
5189 	return 0;
5190 }
5191 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5192 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5193 {
5194 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5195 	    (mask == TRACE_ITER_RECORD_CMD) ||
5196 	    (mask == TRACE_ITER_TRACE_PRINTK))
5197 		lockdep_assert_held(&event_mutex);
5198 
5199 	/* do nothing if flag is already set */
5200 	if (!!(tr->trace_flags & mask) == !!enabled)
5201 		return 0;
5202 
5203 	/* Give the tracer a chance to approve the change */
5204 	if (tr->current_trace->flag_changed)
5205 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5206 			return -EINVAL;
5207 
5208 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5209 		if (enabled) {
5210 			update_printk_trace(tr);
5211 		} else {
5212 			/*
5213 			 * The global_trace cannot clear this.
5214 			 * It's flag only gets cleared if another instance sets it.
5215 			 */
5216 			if (printk_trace == &global_trace)
5217 				return -EINVAL;
5218 			/*
5219 			 * An instance must always have it set.
5220 			 * by default, that's the global_trace instane.
5221 			 */
5222 			if (printk_trace == tr)
5223 				update_printk_trace(&global_trace);
5224 		}
5225 	}
5226 
5227 	if (enabled)
5228 		tr->trace_flags |= mask;
5229 	else
5230 		tr->trace_flags &= ~mask;
5231 
5232 	if (mask == TRACE_ITER_RECORD_CMD)
5233 		trace_event_enable_cmd_record(enabled);
5234 
5235 	if (mask == TRACE_ITER_RECORD_TGID) {
5236 
5237 		if (trace_alloc_tgid_map() < 0) {
5238 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5239 			return -ENOMEM;
5240 		}
5241 
5242 		trace_event_enable_tgid_record(enabled);
5243 	}
5244 
5245 	if (mask == TRACE_ITER_EVENT_FORK)
5246 		trace_event_follow_fork(tr, enabled);
5247 
5248 	if (mask == TRACE_ITER_FUNC_FORK)
5249 		ftrace_pid_follow_fork(tr, enabled);
5250 
5251 	if (mask == TRACE_ITER_OVERWRITE) {
5252 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5253 #ifdef CONFIG_TRACER_MAX_TRACE
5254 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5255 #endif
5256 	}
5257 
5258 	if (mask == TRACE_ITER_PRINTK) {
5259 		trace_printk_start_stop_comm(enabled);
5260 		trace_printk_control(enabled);
5261 	}
5262 
5263 	return 0;
5264 }
5265 
trace_set_options(struct trace_array * tr,char * option)5266 int trace_set_options(struct trace_array *tr, char *option)
5267 {
5268 	char *cmp;
5269 	int neg = 0;
5270 	int ret;
5271 	size_t orig_len = strlen(option);
5272 	int len;
5273 
5274 	cmp = strstrip(option);
5275 
5276 	len = str_has_prefix(cmp, "no");
5277 	if (len)
5278 		neg = 1;
5279 
5280 	cmp += len;
5281 
5282 	mutex_lock(&event_mutex);
5283 	mutex_lock(&trace_types_lock);
5284 
5285 	ret = match_string(trace_options, -1, cmp);
5286 	/* If no option could be set, test the specific tracer options */
5287 	if (ret < 0)
5288 		ret = set_tracer_option(tr, cmp, neg);
5289 	else
5290 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5291 
5292 	mutex_unlock(&trace_types_lock);
5293 	mutex_unlock(&event_mutex);
5294 
5295 	/*
5296 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5297 	 * turn it back into a space.
5298 	 */
5299 	if (orig_len > strlen(option))
5300 		option[strlen(option)] = ' ';
5301 
5302 	return ret;
5303 }
5304 
apply_trace_boot_options(void)5305 static void __init apply_trace_boot_options(void)
5306 {
5307 	char *buf = trace_boot_options_buf;
5308 	char *option;
5309 
5310 	while (true) {
5311 		option = strsep(&buf, ",");
5312 
5313 		if (!option)
5314 			break;
5315 
5316 		if (*option)
5317 			trace_set_options(&global_trace, option);
5318 
5319 		/* Put back the comma to allow this to be called again */
5320 		if (buf)
5321 			*(buf - 1) = ',';
5322 	}
5323 }
5324 
5325 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5326 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5327 			size_t cnt, loff_t *ppos)
5328 {
5329 	struct seq_file *m = filp->private_data;
5330 	struct trace_array *tr = m->private;
5331 	char buf[64];
5332 	int ret;
5333 
5334 	if (cnt >= sizeof(buf))
5335 		return -EINVAL;
5336 
5337 	if (copy_from_user(buf, ubuf, cnt))
5338 		return -EFAULT;
5339 
5340 	buf[cnt] = 0;
5341 
5342 	ret = trace_set_options(tr, buf);
5343 	if (ret < 0)
5344 		return ret;
5345 
5346 	*ppos += cnt;
5347 
5348 	return cnt;
5349 }
5350 
tracing_trace_options_open(struct inode * inode,struct file * file)5351 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5352 {
5353 	struct trace_array *tr = inode->i_private;
5354 	int ret;
5355 
5356 	ret = tracing_check_open_get_tr(tr);
5357 	if (ret)
5358 		return ret;
5359 
5360 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5361 	if (ret < 0)
5362 		trace_array_put(tr);
5363 
5364 	return ret;
5365 }
5366 
5367 static const struct file_operations tracing_iter_fops = {
5368 	.open		= tracing_trace_options_open,
5369 	.read		= seq_read,
5370 	.llseek		= seq_lseek,
5371 	.release	= tracing_single_release_tr,
5372 	.write		= tracing_trace_options_write,
5373 };
5374 
5375 static const char readme_msg[] =
5376 	"tracing mini-HOWTO:\n\n"
5377 	"By default tracefs removes all OTH file permission bits.\n"
5378 	"When mounting tracefs an optional group id can be specified\n"
5379 	"which adds the group to every directory and file in tracefs:\n\n"
5380 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5381 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5382 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5383 	" Important files:\n"
5384 	"  trace\t\t\t- The static contents of the buffer\n"
5385 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5386 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5387 	"  current_tracer\t- function and latency tracers\n"
5388 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5389 	"  error_log\t- error log for failed commands (that support it)\n"
5390 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5391 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5392 	"  trace_clock\t\t- change the clock used to order events\n"
5393 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5394 	"      global:   Synced across CPUs but slows tracing down.\n"
5395 	"     counter:   Not a clock, but just an increment\n"
5396 	"      uptime:   Jiffy counter from time of boot\n"
5397 	"        perf:   Same clock that perf events use\n"
5398 #ifdef CONFIG_X86_64
5399 	"     x86-tsc:   TSC cycle counter\n"
5400 #endif
5401 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5402 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5403 	"    absolute:   Absolute (standalone) timestamp\n"
5404 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5405 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5406 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5407 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5408 	"\t\t\t  Remove sub-buffer with rmdir\n"
5409 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5410 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5411 	"\t\t\t  option name\n"
5412 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5413 #ifdef CONFIG_DYNAMIC_FTRACE
5414 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5415 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5416 	"\t\t\t  functions\n"
5417 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5418 	"\t     modules: Can select a group via module\n"
5419 	"\t      Format: :mod:<module-name>\n"
5420 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5421 	"\t    triggers: a command to perform when function is hit\n"
5422 	"\t      Format: <function>:<trigger>[:count]\n"
5423 	"\t     trigger: traceon, traceoff\n"
5424 	"\t\t      enable_event:<system>:<event>\n"
5425 	"\t\t      disable_event:<system>:<event>\n"
5426 #ifdef CONFIG_STACKTRACE
5427 	"\t\t      stacktrace\n"
5428 #endif
5429 #ifdef CONFIG_TRACER_SNAPSHOT
5430 	"\t\t      snapshot\n"
5431 #endif
5432 	"\t\t      dump\n"
5433 	"\t\t      cpudump\n"
5434 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5435 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5436 	"\t     The first one will disable tracing every time do_fault is hit\n"
5437 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5438 	"\t       The first time do trap is hit and it disables tracing, the\n"
5439 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5440 	"\t       the counter will not decrement. It only decrements when the\n"
5441 	"\t       trigger did work\n"
5442 	"\t     To remove trigger without count:\n"
5443 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5444 	"\t     To remove trigger with a count:\n"
5445 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5446 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5447 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5448 	"\t    modules: Can select a group via module command :mod:\n"
5449 	"\t    Does not accept triggers\n"
5450 #endif /* CONFIG_DYNAMIC_FTRACE */
5451 #ifdef CONFIG_FUNCTION_TRACER
5452 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5453 	"\t\t    (function)\n"
5454 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5455 	"\t\t    (function)\n"
5456 #endif
5457 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5458 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5459 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5460 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5461 #endif
5462 #ifdef CONFIG_TRACER_SNAPSHOT
5463 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5464 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5465 	"\t\t\t  information\n"
5466 #endif
5467 #ifdef CONFIG_STACK_TRACER
5468 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5469 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5470 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5471 	"\t\t\t  new trace)\n"
5472 #ifdef CONFIG_DYNAMIC_FTRACE
5473 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5474 	"\t\t\t  traces\n"
5475 #endif
5476 #endif /* CONFIG_STACK_TRACER */
5477 #ifdef CONFIG_DYNAMIC_EVENTS
5478 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5479 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5480 #endif
5481 #ifdef CONFIG_KPROBE_EVENTS
5482 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5483 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5484 #endif
5485 #ifdef CONFIG_UPROBE_EVENTS
5486 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5487 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5488 #endif
5489 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5490     defined(CONFIG_FPROBE_EVENTS)
5491 	"\t  accepts: event-definitions (one definition per line)\n"
5492 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5493 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5494 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5495 #endif
5496 #ifdef CONFIG_FPROBE_EVENTS
5497 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5498 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5499 #endif
5500 #ifdef CONFIG_HIST_TRIGGERS
5501 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5502 #endif
5503 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5504 	"\t           -:[<group>/][<event>]\n"
5505 #ifdef CONFIG_KPROBE_EVENTS
5506 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5507   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5508 #endif
5509 #ifdef CONFIG_UPROBE_EVENTS
5510   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5511 #endif
5512 	"\t     args: <name>=fetcharg[:type]\n"
5513 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5514 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5515 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5516 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5517 	"\t           <argname>[->field[->field|.field...]],\n"
5518 #endif
5519 #else
5520 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5521 #endif
5522 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5523 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5524 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5525 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5526 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5527 #ifdef CONFIG_HIST_TRIGGERS
5528 	"\t    field: <stype> <name>;\n"
5529 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5530 	"\t           [unsigned] char/int/long\n"
5531 #endif
5532 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5533 	"\t            of the <attached-group>/<attached-event>.\n"
5534 #endif
5535 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5536 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5537 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5538 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5539 	"\t\t\t  events\n"
5540 	"      filter\t\t- If set, only events passing filter are traced\n"
5541 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5542 	"\t\t\t  <event>:\n"
5543 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5544 	"      filter\t\t- If set, only events passing filter are traced\n"
5545 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5546 	"\t    Format: <trigger>[:count][if <filter>]\n"
5547 	"\t   trigger: traceon, traceoff\n"
5548 	"\t            enable_event:<system>:<event>\n"
5549 	"\t            disable_event:<system>:<event>\n"
5550 #ifdef CONFIG_HIST_TRIGGERS
5551 	"\t            enable_hist:<system>:<event>\n"
5552 	"\t            disable_hist:<system>:<event>\n"
5553 #endif
5554 #ifdef CONFIG_STACKTRACE
5555 	"\t\t    stacktrace\n"
5556 #endif
5557 #ifdef CONFIG_TRACER_SNAPSHOT
5558 	"\t\t    snapshot\n"
5559 #endif
5560 #ifdef CONFIG_HIST_TRIGGERS
5561 	"\t\t    hist (see below)\n"
5562 #endif
5563 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5564 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5565 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5566 	"\t                  events/block/block_unplug/trigger\n"
5567 	"\t   The first disables tracing every time block_unplug is hit.\n"
5568 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5569 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5570 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5571 	"\t   Like function triggers, the counter is only decremented if it\n"
5572 	"\t    enabled or disabled tracing.\n"
5573 	"\t   To remove a trigger without a count:\n"
5574 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5575 	"\t   To remove a trigger with a count:\n"
5576 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5577 	"\t   Filters can be ignored when removing a trigger.\n"
5578 #ifdef CONFIG_HIST_TRIGGERS
5579 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5580 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5581 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5582 	"\t            [:values=<field1[,field2,...]>]\n"
5583 	"\t            [:sort=<field1[,field2,...]>]\n"
5584 	"\t            [:size=#entries]\n"
5585 	"\t            [:pause][:continue][:clear]\n"
5586 	"\t            [:name=histname1]\n"
5587 	"\t            [:nohitcount]\n"
5588 	"\t            [:<handler>.<action>]\n"
5589 	"\t            [if <filter>]\n\n"
5590 	"\t    Note, special fields can be used as well:\n"
5591 	"\t            common_timestamp - to record current timestamp\n"
5592 	"\t            common_cpu - to record the CPU the event happened on\n"
5593 	"\n"
5594 	"\t    A hist trigger variable can be:\n"
5595 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5596 	"\t        - a reference to another variable e.g. y=$x,\n"
5597 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5598 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5599 	"\n"
5600 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5601 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5602 	"\t    variable reference, field or numeric literal.\n"
5603 	"\n"
5604 	"\t    When a matching event is hit, an entry is added to a hash\n"
5605 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5606 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5607 	"\t    correspond to fields in the event's format description.  Keys\n"
5608 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5609 	"\t    Compound keys consisting of up to two fields can be specified\n"
5610 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5611 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5612 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5613 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5614 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5615 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5616 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5617 	"\t    its histogram data will be shared with other triggers of the\n"
5618 	"\t    same name, and trigger hits will update this common data.\n\n"
5619 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5620 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5621 	"\t    triggers attached to an event, there will be a table for each\n"
5622 	"\t    trigger in the output.  The table displayed for a named\n"
5623 	"\t    trigger will be the same as any other instance having the\n"
5624 	"\t    same name.  The default format used to display a given field\n"
5625 	"\t    can be modified by appending any of the following modifiers\n"
5626 	"\t    to the field name, as applicable:\n\n"
5627 	"\t            .hex        display a number as a hex value\n"
5628 	"\t            .sym        display an address as a symbol\n"
5629 	"\t            .sym-offset display an address as a symbol and offset\n"
5630 	"\t            .execname   display a common_pid as a program name\n"
5631 	"\t            .syscall    display a syscall id as a syscall name\n"
5632 	"\t            .log2       display log2 value rather than raw number\n"
5633 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5634 	"\t            .usecs      display a common_timestamp in microseconds\n"
5635 	"\t            .percent    display a number of percentage value\n"
5636 	"\t            .graph      display a bar-graph of a value\n\n"
5637 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5638 	"\t    trigger or to start a hist trigger but not log any events\n"
5639 	"\t    until told to do so.  'continue' can be used to start or\n"
5640 	"\t    restart a paused hist trigger.\n\n"
5641 	"\t    The 'clear' parameter will clear the contents of a running\n"
5642 	"\t    hist trigger and leave its current paused/active state\n"
5643 	"\t    unchanged.\n\n"
5644 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5645 	"\t    raw hitcount in the histogram.\n\n"
5646 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5647 	"\t    have one event conditionally start and stop another event's\n"
5648 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5649 	"\t    the enable_event and disable_event triggers.\n\n"
5650 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5651 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5652 	"\t        <handler>.<action>\n\n"
5653 	"\t    The available handlers are:\n\n"
5654 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5655 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5656 	"\t        onchange(var)            - invoke action if var changes\n\n"
5657 	"\t    The available actions are:\n\n"
5658 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5659 	"\t        save(field,...)                      - save current event fields\n"
5660 #ifdef CONFIG_TRACER_SNAPSHOT
5661 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5662 #endif
5663 #ifdef CONFIG_SYNTH_EVENTS
5664 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5665 	"\t  Write into this file to define/undefine new synthetic events.\n"
5666 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5667 #endif
5668 #endif
5669 ;
5670 
5671 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5672 tracing_readme_read(struct file *filp, char __user *ubuf,
5673 		       size_t cnt, loff_t *ppos)
5674 {
5675 	return simple_read_from_buffer(ubuf, cnt, ppos,
5676 					readme_msg, strlen(readme_msg));
5677 }
5678 
5679 static const struct file_operations tracing_readme_fops = {
5680 	.open		= tracing_open_generic,
5681 	.read		= tracing_readme_read,
5682 	.llseek		= generic_file_llseek,
5683 };
5684 
5685 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5686 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5687 update_eval_map(union trace_eval_map_item *ptr)
5688 {
5689 	if (!ptr->map.eval_string) {
5690 		if (ptr->tail.next) {
5691 			ptr = ptr->tail.next;
5692 			/* Set ptr to the next real item (skip head) */
5693 			ptr++;
5694 		} else
5695 			return NULL;
5696 	}
5697 	return ptr;
5698 }
5699 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5700 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5701 {
5702 	union trace_eval_map_item *ptr = v;
5703 
5704 	/*
5705 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5706 	 * This really should never happen.
5707 	 */
5708 	(*pos)++;
5709 	ptr = update_eval_map(ptr);
5710 	if (WARN_ON_ONCE(!ptr))
5711 		return NULL;
5712 
5713 	ptr++;
5714 	ptr = update_eval_map(ptr);
5715 
5716 	return ptr;
5717 }
5718 
eval_map_start(struct seq_file * m,loff_t * pos)5719 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5720 {
5721 	union trace_eval_map_item *v;
5722 	loff_t l = 0;
5723 
5724 	mutex_lock(&trace_eval_mutex);
5725 
5726 	v = trace_eval_maps;
5727 	if (v)
5728 		v++;
5729 
5730 	while (v && l < *pos) {
5731 		v = eval_map_next(m, v, &l);
5732 	}
5733 
5734 	return v;
5735 }
5736 
eval_map_stop(struct seq_file * m,void * v)5737 static void eval_map_stop(struct seq_file *m, void *v)
5738 {
5739 	mutex_unlock(&trace_eval_mutex);
5740 }
5741 
eval_map_show(struct seq_file * m,void * v)5742 static int eval_map_show(struct seq_file *m, void *v)
5743 {
5744 	union trace_eval_map_item *ptr = v;
5745 
5746 	seq_printf(m, "%s %ld (%s)\n",
5747 		   ptr->map.eval_string, ptr->map.eval_value,
5748 		   ptr->map.system);
5749 
5750 	return 0;
5751 }
5752 
5753 static const struct seq_operations tracing_eval_map_seq_ops = {
5754 	.start		= eval_map_start,
5755 	.next		= eval_map_next,
5756 	.stop		= eval_map_stop,
5757 	.show		= eval_map_show,
5758 };
5759 
tracing_eval_map_open(struct inode * inode,struct file * filp)5760 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5761 {
5762 	int ret;
5763 
5764 	ret = tracing_check_open_get_tr(NULL);
5765 	if (ret)
5766 		return ret;
5767 
5768 	return seq_open(filp, &tracing_eval_map_seq_ops);
5769 }
5770 
5771 static const struct file_operations tracing_eval_map_fops = {
5772 	.open		= tracing_eval_map_open,
5773 	.read		= seq_read,
5774 	.llseek		= seq_lseek,
5775 	.release	= seq_release,
5776 };
5777 
5778 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5779 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5780 {
5781 	/* Return tail of array given the head */
5782 	return ptr + ptr->head.length + 1;
5783 }
5784 
5785 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5786 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5787 			   int len)
5788 {
5789 	struct trace_eval_map **stop;
5790 	struct trace_eval_map **map;
5791 	union trace_eval_map_item *map_array;
5792 	union trace_eval_map_item *ptr;
5793 
5794 	stop = start + len;
5795 
5796 	/*
5797 	 * The trace_eval_maps contains the map plus a head and tail item,
5798 	 * where the head holds the module and length of array, and the
5799 	 * tail holds a pointer to the next list.
5800 	 */
5801 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5802 	if (!map_array) {
5803 		pr_warn("Unable to allocate trace eval mapping\n");
5804 		return;
5805 	}
5806 
5807 	guard(mutex)(&trace_eval_mutex);
5808 
5809 	if (!trace_eval_maps)
5810 		trace_eval_maps = map_array;
5811 	else {
5812 		ptr = trace_eval_maps;
5813 		for (;;) {
5814 			ptr = trace_eval_jmp_to_tail(ptr);
5815 			if (!ptr->tail.next)
5816 				break;
5817 			ptr = ptr->tail.next;
5818 
5819 		}
5820 		ptr->tail.next = map_array;
5821 	}
5822 	map_array->head.mod = mod;
5823 	map_array->head.length = len;
5824 	map_array++;
5825 
5826 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5827 		map_array->map = **map;
5828 		map_array++;
5829 	}
5830 	memset(map_array, 0, sizeof(*map_array));
5831 }
5832 
trace_create_eval_file(struct dentry * d_tracer)5833 static void trace_create_eval_file(struct dentry *d_tracer)
5834 {
5835 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5836 			  NULL, &tracing_eval_map_fops);
5837 }
5838 
5839 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5840 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5841 static inline void trace_insert_eval_map_file(struct module *mod,
5842 			      struct trace_eval_map **start, int len) { }
5843 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5844 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5845 static void trace_insert_eval_map(struct module *mod,
5846 				  struct trace_eval_map **start, int len)
5847 {
5848 	struct trace_eval_map **map;
5849 
5850 	if (len <= 0)
5851 		return;
5852 
5853 	map = start;
5854 
5855 	trace_event_eval_update(map, len);
5856 
5857 	trace_insert_eval_map_file(mod, start, len);
5858 }
5859 
5860 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5861 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5862 		       size_t cnt, loff_t *ppos)
5863 {
5864 	struct trace_array *tr = filp->private_data;
5865 	char buf[MAX_TRACER_SIZE+2];
5866 	int r;
5867 
5868 	mutex_lock(&trace_types_lock);
5869 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5870 	mutex_unlock(&trace_types_lock);
5871 
5872 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5873 }
5874 
tracer_init(struct tracer * t,struct trace_array * tr)5875 int tracer_init(struct tracer *t, struct trace_array *tr)
5876 {
5877 	tracing_reset_online_cpus(&tr->array_buffer);
5878 	return t->init(tr);
5879 }
5880 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5881 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5882 {
5883 	int cpu;
5884 
5885 	for_each_tracing_cpu(cpu)
5886 		per_cpu_ptr(buf->data, cpu)->entries = val;
5887 }
5888 
update_buffer_entries(struct array_buffer * buf,int cpu)5889 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5890 {
5891 	if (cpu == RING_BUFFER_ALL_CPUS) {
5892 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5893 	} else {
5894 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5895 	}
5896 }
5897 
5898 #ifdef CONFIG_TRACER_MAX_TRACE
5899 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5900 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5901 					struct array_buffer *size_buf, int cpu_id)
5902 {
5903 	int cpu, ret = 0;
5904 
5905 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5906 		for_each_tracing_cpu(cpu) {
5907 			ret = ring_buffer_resize(trace_buf->buffer,
5908 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5909 			if (ret < 0)
5910 				break;
5911 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5912 				per_cpu_ptr(size_buf->data, cpu)->entries;
5913 		}
5914 	} else {
5915 		ret = ring_buffer_resize(trace_buf->buffer,
5916 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5917 		if (ret == 0)
5918 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5919 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5920 	}
5921 
5922 	return ret;
5923 }
5924 #endif /* CONFIG_TRACER_MAX_TRACE */
5925 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5926 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5927 					unsigned long size, int cpu)
5928 {
5929 	int ret;
5930 
5931 	/*
5932 	 * If kernel or user changes the size of the ring buffer
5933 	 * we use the size that was given, and we can forget about
5934 	 * expanding it later.
5935 	 */
5936 	trace_set_ring_buffer_expanded(tr);
5937 
5938 	/* May be called before buffers are initialized */
5939 	if (!tr->array_buffer.buffer)
5940 		return 0;
5941 
5942 	/* Do not allow tracing while resizing ring buffer */
5943 	tracing_stop_tr(tr);
5944 
5945 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5946 	if (ret < 0)
5947 		goto out_start;
5948 
5949 #ifdef CONFIG_TRACER_MAX_TRACE
5950 	if (!tr->allocated_snapshot)
5951 		goto out;
5952 
5953 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5954 	if (ret < 0) {
5955 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5956 						     &tr->array_buffer, cpu);
5957 		if (r < 0) {
5958 			/*
5959 			 * AARGH! We are left with different
5960 			 * size max buffer!!!!
5961 			 * The max buffer is our "snapshot" buffer.
5962 			 * When a tracer needs a snapshot (one of the
5963 			 * latency tracers), it swaps the max buffer
5964 			 * with the saved snap shot. We succeeded to
5965 			 * update the size of the main buffer, but failed to
5966 			 * update the size of the max buffer. But when we tried
5967 			 * to reset the main buffer to the original size, we
5968 			 * failed there too. This is very unlikely to
5969 			 * happen, but if it does, warn and kill all
5970 			 * tracing.
5971 			 */
5972 			WARN_ON(1);
5973 			tracing_disabled = 1;
5974 		}
5975 		goto out_start;
5976 	}
5977 
5978 	update_buffer_entries(&tr->max_buffer, cpu);
5979 
5980  out:
5981 #endif /* CONFIG_TRACER_MAX_TRACE */
5982 
5983 	update_buffer_entries(&tr->array_buffer, cpu);
5984  out_start:
5985 	tracing_start_tr(tr);
5986 	return ret;
5987 }
5988 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5989 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5990 				  unsigned long size, int cpu_id)
5991 {
5992 	guard(mutex)(&trace_types_lock);
5993 
5994 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5995 		/* make sure, this cpu is enabled in the mask */
5996 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5997 			return -EINVAL;
5998 	}
5999 
6000 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6001 }
6002 
update_last_data(struct trace_array * tr)6003 static void update_last_data(struct trace_array *tr)
6004 {
6005 	if (!tr->text_delta && !tr->data_delta)
6006 		return;
6007 
6008 	/*
6009 	 * Need to clear all CPU buffers as there cannot be events
6010 	 * from the previous boot mixed with events with this boot
6011 	 * as that will cause a confusing trace. Need to clear all
6012 	 * CPU buffers, even for those that may currently be offline.
6013 	 */
6014 	tracing_reset_all_cpus(&tr->array_buffer);
6015 
6016 	/* Using current data now */
6017 	tr->text_delta = 0;
6018 	tr->data_delta = 0;
6019 }
6020 
6021 /**
6022  * tracing_update_buffers - used by tracing facility to expand ring buffers
6023  * @tr: The tracing instance
6024  *
6025  * To save on memory when the tracing is never used on a system with it
6026  * configured in. The ring buffers are set to a minimum size. But once
6027  * a user starts to use the tracing facility, then they need to grow
6028  * to their default size.
6029  *
6030  * This function is to be called when a tracer is about to be used.
6031  */
tracing_update_buffers(struct trace_array * tr)6032 int tracing_update_buffers(struct trace_array *tr)
6033 {
6034 	int ret = 0;
6035 
6036 	mutex_lock(&trace_types_lock);
6037 
6038 	update_last_data(tr);
6039 
6040 	if (!tr->ring_buffer_expanded)
6041 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6042 						RING_BUFFER_ALL_CPUS);
6043 	mutex_unlock(&trace_types_lock);
6044 
6045 	return ret;
6046 }
6047 
6048 struct trace_option_dentry;
6049 
6050 static void
6051 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6052 
6053 /*
6054  * Used to clear out the tracer before deletion of an instance.
6055  * Must have trace_types_lock held.
6056  */
tracing_set_nop(struct trace_array * tr)6057 static void tracing_set_nop(struct trace_array *tr)
6058 {
6059 	if (tr->current_trace == &nop_trace)
6060 		return;
6061 
6062 	tr->current_trace->enabled--;
6063 
6064 	if (tr->current_trace->reset)
6065 		tr->current_trace->reset(tr);
6066 
6067 	tr->current_trace = &nop_trace;
6068 }
6069 
6070 static bool tracer_options_updated;
6071 
add_tracer_options(struct trace_array * tr,struct tracer * t)6072 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6073 {
6074 	/* Only enable if the directory has been created already. */
6075 	if (!tr->dir)
6076 		return;
6077 
6078 	/* Only create trace option files after update_tracer_options finish */
6079 	if (!tracer_options_updated)
6080 		return;
6081 
6082 	create_trace_option_files(tr, t);
6083 }
6084 
tracing_set_tracer(struct trace_array * tr,const char * buf)6085 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6086 {
6087 	struct tracer *t;
6088 #ifdef CONFIG_TRACER_MAX_TRACE
6089 	bool had_max_tr;
6090 #endif
6091 	int ret;
6092 
6093 	guard(mutex)(&trace_types_lock);
6094 
6095 	update_last_data(tr);
6096 
6097 	if (!tr->ring_buffer_expanded) {
6098 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6099 						RING_BUFFER_ALL_CPUS);
6100 		if (ret < 0)
6101 			return ret;
6102 		ret = 0;
6103 	}
6104 
6105 	for (t = trace_types; t; t = t->next) {
6106 		if (strcmp(t->name, buf) == 0)
6107 			break;
6108 	}
6109 	if (!t)
6110 		return -EINVAL;
6111 
6112 	if (t == tr->current_trace)
6113 		return 0;
6114 
6115 #ifdef CONFIG_TRACER_SNAPSHOT
6116 	if (t->use_max_tr) {
6117 		local_irq_disable();
6118 		arch_spin_lock(&tr->max_lock);
6119 		ret = tr->cond_snapshot ? -EBUSY : 0;
6120 		arch_spin_unlock(&tr->max_lock);
6121 		local_irq_enable();
6122 		if (ret)
6123 			return ret;
6124 	}
6125 #endif
6126 	/* Some tracers won't work on kernel command line */
6127 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6128 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6129 			t->name);
6130 		return 0;
6131 	}
6132 
6133 	/* Some tracers are only allowed for the top level buffer */
6134 	if (!trace_ok_for_array(t, tr))
6135 		return -EINVAL;
6136 
6137 	/* If trace pipe files are being read, we can't change the tracer */
6138 	if (tr->trace_ref)
6139 		return -EBUSY;
6140 
6141 	trace_branch_disable();
6142 
6143 	tr->current_trace->enabled--;
6144 
6145 	if (tr->current_trace->reset)
6146 		tr->current_trace->reset(tr);
6147 
6148 #ifdef CONFIG_TRACER_MAX_TRACE
6149 	had_max_tr = tr->current_trace->use_max_tr;
6150 
6151 	/* Current trace needs to be nop_trace before synchronize_rcu */
6152 	tr->current_trace = &nop_trace;
6153 
6154 	if (had_max_tr && !t->use_max_tr) {
6155 		/*
6156 		 * We need to make sure that the update_max_tr sees that
6157 		 * current_trace changed to nop_trace to keep it from
6158 		 * swapping the buffers after we resize it.
6159 		 * The update_max_tr is called from interrupts disabled
6160 		 * so a synchronized_sched() is sufficient.
6161 		 */
6162 		synchronize_rcu();
6163 		free_snapshot(tr);
6164 		tracing_disarm_snapshot(tr);
6165 	}
6166 
6167 	if (!had_max_tr && t->use_max_tr) {
6168 		ret = tracing_arm_snapshot_locked(tr);
6169 		if (ret)
6170 			return ret;
6171 	}
6172 #else
6173 	tr->current_trace = &nop_trace;
6174 #endif
6175 
6176 	if (t->init) {
6177 		ret = tracer_init(t, tr);
6178 		if (ret) {
6179 #ifdef CONFIG_TRACER_MAX_TRACE
6180 			if (t->use_max_tr)
6181 				tracing_disarm_snapshot(tr);
6182 #endif
6183 			return ret;
6184 		}
6185 	}
6186 
6187 	tr->current_trace = t;
6188 	tr->current_trace->enabled++;
6189 	trace_branch_enable(tr);
6190 
6191 	return 0;
6192 }
6193 
6194 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6195 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6196 			size_t cnt, loff_t *ppos)
6197 {
6198 	struct trace_array *tr = filp->private_data;
6199 	char buf[MAX_TRACER_SIZE+1];
6200 	char *name;
6201 	size_t ret;
6202 	int err;
6203 
6204 	ret = cnt;
6205 
6206 	if (cnt > MAX_TRACER_SIZE)
6207 		cnt = MAX_TRACER_SIZE;
6208 
6209 	if (copy_from_user(buf, ubuf, cnt))
6210 		return -EFAULT;
6211 
6212 	buf[cnt] = 0;
6213 
6214 	name = strim(buf);
6215 
6216 	err = tracing_set_tracer(tr, name);
6217 	if (err)
6218 		return err;
6219 
6220 	*ppos += ret;
6221 
6222 	return ret;
6223 }
6224 
6225 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6226 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6227 		   size_t cnt, loff_t *ppos)
6228 {
6229 	char buf[64];
6230 	int r;
6231 
6232 	r = snprintf(buf, sizeof(buf), "%ld\n",
6233 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6234 	if (r > sizeof(buf))
6235 		r = sizeof(buf);
6236 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6237 }
6238 
6239 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6240 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6241 		    size_t cnt, loff_t *ppos)
6242 {
6243 	unsigned long val;
6244 	int ret;
6245 
6246 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6247 	if (ret)
6248 		return ret;
6249 
6250 	*ptr = val * 1000;
6251 
6252 	return cnt;
6253 }
6254 
6255 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6256 tracing_thresh_read(struct file *filp, char __user *ubuf,
6257 		    size_t cnt, loff_t *ppos)
6258 {
6259 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6260 }
6261 
6262 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6263 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6264 		     size_t cnt, loff_t *ppos)
6265 {
6266 	struct trace_array *tr = filp->private_data;
6267 	int ret;
6268 
6269 	guard(mutex)(&trace_types_lock);
6270 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6271 	if (ret < 0)
6272 		return ret;
6273 
6274 	if (tr->current_trace->update_thresh) {
6275 		ret = tr->current_trace->update_thresh(tr);
6276 		if (ret < 0)
6277 			return ret;
6278 	}
6279 
6280 	return cnt;
6281 }
6282 
6283 #ifdef CONFIG_TRACER_MAX_TRACE
6284 
6285 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6286 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6287 		     size_t cnt, loff_t *ppos)
6288 {
6289 	struct trace_array *tr = filp->private_data;
6290 
6291 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6292 }
6293 
6294 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6295 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6296 		      size_t cnt, loff_t *ppos)
6297 {
6298 	struct trace_array *tr = filp->private_data;
6299 
6300 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6301 }
6302 
6303 #endif
6304 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6305 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6306 {
6307 	if (cpu == RING_BUFFER_ALL_CPUS) {
6308 		if (cpumask_empty(tr->pipe_cpumask)) {
6309 			cpumask_setall(tr->pipe_cpumask);
6310 			return 0;
6311 		}
6312 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6313 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6314 		return 0;
6315 	}
6316 	return -EBUSY;
6317 }
6318 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6319 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6320 {
6321 	if (cpu == RING_BUFFER_ALL_CPUS) {
6322 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6323 		cpumask_clear(tr->pipe_cpumask);
6324 	} else {
6325 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6326 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6327 	}
6328 }
6329 
tracing_open_pipe(struct inode * inode,struct file * filp)6330 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6331 {
6332 	struct trace_array *tr = inode->i_private;
6333 	struct trace_iterator *iter;
6334 	int cpu;
6335 	int ret;
6336 
6337 	ret = tracing_check_open_get_tr(tr);
6338 	if (ret)
6339 		return ret;
6340 
6341 	mutex_lock(&trace_types_lock);
6342 	cpu = tracing_get_cpu(inode);
6343 	ret = open_pipe_on_cpu(tr, cpu);
6344 	if (ret)
6345 		goto fail_pipe_on_cpu;
6346 
6347 	/* create a buffer to store the information to pass to userspace */
6348 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6349 	if (!iter) {
6350 		ret = -ENOMEM;
6351 		goto fail_alloc_iter;
6352 	}
6353 
6354 	trace_seq_init(&iter->seq);
6355 	iter->trace = tr->current_trace;
6356 
6357 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6358 		ret = -ENOMEM;
6359 		goto fail;
6360 	}
6361 
6362 	/* trace pipe does not show start of buffer */
6363 	cpumask_setall(iter->started);
6364 
6365 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6366 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6367 
6368 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6369 	if (trace_clocks[tr->clock_id].in_ns)
6370 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6371 
6372 	iter->tr = tr;
6373 	iter->array_buffer = &tr->array_buffer;
6374 	iter->cpu_file = cpu;
6375 	mutex_init(&iter->mutex);
6376 	filp->private_data = iter;
6377 
6378 	if (iter->trace->pipe_open)
6379 		iter->trace->pipe_open(iter);
6380 
6381 	nonseekable_open(inode, filp);
6382 
6383 	tr->trace_ref++;
6384 
6385 	mutex_unlock(&trace_types_lock);
6386 	return ret;
6387 
6388 fail:
6389 	kfree(iter);
6390 fail_alloc_iter:
6391 	close_pipe_on_cpu(tr, cpu);
6392 fail_pipe_on_cpu:
6393 	__trace_array_put(tr);
6394 	mutex_unlock(&trace_types_lock);
6395 	return ret;
6396 }
6397 
tracing_release_pipe(struct inode * inode,struct file * file)6398 static int tracing_release_pipe(struct inode *inode, struct file *file)
6399 {
6400 	struct trace_iterator *iter = file->private_data;
6401 	struct trace_array *tr = inode->i_private;
6402 
6403 	mutex_lock(&trace_types_lock);
6404 
6405 	tr->trace_ref--;
6406 
6407 	if (iter->trace->pipe_close)
6408 		iter->trace->pipe_close(iter);
6409 	close_pipe_on_cpu(tr, iter->cpu_file);
6410 	mutex_unlock(&trace_types_lock);
6411 
6412 	free_trace_iter_content(iter);
6413 	kfree(iter);
6414 
6415 	trace_array_put(tr);
6416 
6417 	return 0;
6418 }
6419 
6420 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6421 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6422 {
6423 	struct trace_array *tr = iter->tr;
6424 
6425 	/* Iterators are static, they should be filled or empty */
6426 	if (trace_buffer_iter(iter, iter->cpu_file))
6427 		return EPOLLIN | EPOLLRDNORM;
6428 
6429 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6430 		/*
6431 		 * Always select as readable when in blocking mode
6432 		 */
6433 		return EPOLLIN | EPOLLRDNORM;
6434 	else
6435 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6436 					     filp, poll_table, iter->tr->buffer_percent);
6437 }
6438 
6439 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6440 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6441 {
6442 	struct trace_iterator *iter = filp->private_data;
6443 
6444 	return trace_poll(iter, filp, poll_table);
6445 }
6446 
6447 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6448 static int tracing_wait_pipe(struct file *filp)
6449 {
6450 	struct trace_iterator *iter = filp->private_data;
6451 	int ret;
6452 
6453 	while (trace_empty(iter)) {
6454 
6455 		if ((filp->f_flags & O_NONBLOCK)) {
6456 			return -EAGAIN;
6457 		}
6458 
6459 		/*
6460 		 * We block until we read something and tracing is disabled.
6461 		 * We still block if tracing is disabled, but we have never
6462 		 * read anything. This allows a user to cat this file, and
6463 		 * then enable tracing. But after we have read something,
6464 		 * we give an EOF when tracing is again disabled.
6465 		 *
6466 		 * iter->pos will be 0 if we haven't read anything.
6467 		 */
6468 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6469 			break;
6470 
6471 		mutex_unlock(&iter->mutex);
6472 
6473 		ret = wait_on_pipe(iter, 0);
6474 
6475 		mutex_lock(&iter->mutex);
6476 
6477 		if (ret)
6478 			return ret;
6479 	}
6480 
6481 	return 1;
6482 }
6483 
6484 /*
6485  * Consumer reader.
6486  */
6487 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6488 tracing_read_pipe(struct file *filp, char __user *ubuf,
6489 		  size_t cnt, loff_t *ppos)
6490 {
6491 	struct trace_iterator *iter = filp->private_data;
6492 	ssize_t sret;
6493 
6494 	/*
6495 	 * Avoid more than one consumer on a single file descriptor
6496 	 * This is just a matter of traces coherency, the ring buffer itself
6497 	 * is protected.
6498 	 */
6499 	guard(mutex)(&iter->mutex);
6500 
6501 	/* return any leftover data */
6502 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6503 	if (sret != -EBUSY)
6504 		return sret;
6505 
6506 	trace_seq_init(&iter->seq);
6507 
6508 	if (iter->trace->read) {
6509 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6510 		if (sret)
6511 			return sret;
6512 	}
6513 
6514 waitagain:
6515 	sret = tracing_wait_pipe(filp);
6516 	if (sret <= 0)
6517 		return sret;
6518 
6519 	/* stop when tracing is finished */
6520 	if (trace_empty(iter))
6521 		return 0;
6522 
6523 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6524 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6525 
6526 	/* reset all but tr, trace, and overruns */
6527 	trace_iterator_reset(iter);
6528 	cpumask_clear(iter->started);
6529 	trace_seq_init(&iter->seq);
6530 
6531 	trace_event_read_lock();
6532 	trace_access_lock(iter->cpu_file);
6533 	while (trace_find_next_entry_inc(iter) != NULL) {
6534 		enum print_line_t ret;
6535 		int save_len = iter->seq.seq.len;
6536 
6537 		ret = print_trace_line(iter);
6538 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6539 			/*
6540 			 * If one print_trace_line() fills entire trace_seq in one shot,
6541 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6542 			 * In this case, we need to consume it, otherwise, loop will peek
6543 			 * this event next time, resulting in an infinite loop.
6544 			 */
6545 			if (save_len == 0) {
6546 				iter->seq.full = 0;
6547 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6548 				trace_consume(iter);
6549 				break;
6550 			}
6551 
6552 			/* In other cases, don't print partial lines */
6553 			iter->seq.seq.len = save_len;
6554 			break;
6555 		}
6556 		if (ret != TRACE_TYPE_NO_CONSUME)
6557 			trace_consume(iter);
6558 
6559 		if (trace_seq_used(&iter->seq) >= cnt)
6560 			break;
6561 
6562 		/*
6563 		 * Setting the full flag means we reached the trace_seq buffer
6564 		 * size and we should leave by partial output condition above.
6565 		 * One of the trace_seq_* functions is not used properly.
6566 		 */
6567 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6568 			  iter->ent->type);
6569 	}
6570 	trace_access_unlock(iter->cpu_file);
6571 	trace_event_read_unlock();
6572 
6573 	/* Now copy what we have to the user */
6574 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6575 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6576 		trace_seq_init(&iter->seq);
6577 
6578 	/*
6579 	 * If there was nothing to send to user, in spite of consuming trace
6580 	 * entries, go back to wait for more entries.
6581 	 */
6582 	if (sret == -EBUSY)
6583 		goto waitagain;
6584 
6585 	return sret;
6586 }
6587 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6588 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6589 				     unsigned int idx)
6590 {
6591 	__free_page(spd->pages[idx]);
6592 }
6593 
6594 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6595 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6596 {
6597 	size_t count;
6598 	int save_len;
6599 	int ret;
6600 
6601 	/* Seq buffer is page-sized, exactly what we need. */
6602 	for (;;) {
6603 		save_len = iter->seq.seq.len;
6604 		ret = print_trace_line(iter);
6605 
6606 		if (trace_seq_has_overflowed(&iter->seq)) {
6607 			iter->seq.seq.len = save_len;
6608 			break;
6609 		}
6610 
6611 		/*
6612 		 * This should not be hit, because it should only
6613 		 * be set if the iter->seq overflowed. But check it
6614 		 * anyway to be safe.
6615 		 */
6616 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6617 			iter->seq.seq.len = save_len;
6618 			break;
6619 		}
6620 
6621 		count = trace_seq_used(&iter->seq) - save_len;
6622 		if (rem < count) {
6623 			rem = 0;
6624 			iter->seq.seq.len = save_len;
6625 			break;
6626 		}
6627 
6628 		if (ret != TRACE_TYPE_NO_CONSUME)
6629 			trace_consume(iter);
6630 		rem -= count;
6631 		if (!trace_find_next_entry_inc(iter))	{
6632 			rem = 0;
6633 			iter->ent = NULL;
6634 			break;
6635 		}
6636 	}
6637 
6638 	return rem;
6639 }
6640 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6641 static ssize_t tracing_splice_read_pipe(struct file *filp,
6642 					loff_t *ppos,
6643 					struct pipe_inode_info *pipe,
6644 					size_t len,
6645 					unsigned int flags)
6646 {
6647 	struct page *pages_def[PIPE_DEF_BUFFERS];
6648 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6649 	struct trace_iterator *iter = filp->private_data;
6650 	struct splice_pipe_desc spd = {
6651 		.pages		= pages_def,
6652 		.partial	= partial_def,
6653 		.nr_pages	= 0, /* This gets updated below. */
6654 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6655 		.ops		= &default_pipe_buf_ops,
6656 		.spd_release	= tracing_spd_release_pipe,
6657 	};
6658 	ssize_t ret;
6659 	size_t rem;
6660 	unsigned int i;
6661 
6662 	if (splice_grow_spd(pipe, &spd))
6663 		return -ENOMEM;
6664 
6665 	mutex_lock(&iter->mutex);
6666 
6667 	if (iter->trace->splice_read) {
6668 		ret = iter->trace->splice_read(iter, filp,
6669 					       ppos, pipe, len, flags);
6670 		if (ret)
6671 			goto out_err;
6672 	}
6673 
6674 	ret = tracing_wait_pipe(filp);
6675 	if (ret <= 0)
6676 		goto out_err;
6677 
6678 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6679 		ret = -EFAULT;
6680 		goto out_err;
6681 	}
6682 
6683 	trace_event_read_lock();
6684 	trace_access_lock(iter->cpu_file);
6685 
6686 	/* Fill as many pages as possible. */
6687 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6688 		spd.pages[i] = alloc_page(GFP_KERNEL);
6689 		if (!spd.pages[i])
6690 			break;
6691 
6692 		rem = tracing_fill_pipe_page(rem, iter);
6693 
6694 		/* Copy the data into the page, so we can start over. */
6695 		ret = trace_seq_to_buffer(&iter->seq,
6696 					  page_address(spd.pages[i]),
6697 					  min((size_t)trace_seq_used(&iter->seq),
6698 						  (size_t)PAGE_SIZE));
6699 		if (ret < 0) {
6700 			__free_page(spd.pages[i]);
6701 			break;
6702 		}
6703 		spd.partial[i].offset = 0;
6704 		spd.partial[i].len = ret;
6705 
6706 		trace_seq_init(&iter->seq);
6707 	}
6708 
6709 	trace_access_unlock(iter->cpu_file);
6710 	trace_event_read_unlock();
6711 	mutex_unlock(&iter->mutex);
6712 
6713 	spd.nr_pages = i;
6714 
6715 	if (i)
6716 		ret = splice_to_pipe(pipe, &spd);
6717 	else
6718 		ret = 0;
6719 out:
6720 	splice_shrink_spd(&spd);
6721 	return ret;
6722 
6723 out_err:
6724 	mutex_unlock(&iter->mutex);
6725 	goto out;
6726 }
6727 
6728 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6729 tracing_entries_read(struct file *filp, char __user *ubuf,
6730 		     size_t cnt, loff_t *ppos)
6731 {
6732 	struct inode *inode = file_inode(filp);
6733 	struct trace_array *tr = inode->i_private;
6734 	int cpu = tracing_get_cpu(inode);
6735 	char buf[64];
6736 	int r = 0;
6737 	ssize_t ret;
6738 
6739 	mutex_lock(&trace_types_lock);
6740 
6741 	if (cpu == RING_BUFFER_ALL_CPUS) {
6742 		int cpu, buf_size_same;
6743 		unsigned long size;
6744 
6745 		size = 0;
6746 		buf_size_same = 1;
6747 		/* check if all cpu sizes are same */
6748 		for_each_tracing_cpu(cpu) {
6749 			/* fill in the size from first enabled cpu */
6750 			if (size == 0)
6751 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6752 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6753 				buf_size_same = 0;
6754 				break;
6755 			}
6756 		}
6757 
6758 		if (buf_size_same) {
6759 			if (!tr->ring_buffer_expanded)
6760 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6761 					    size >> 10,
6762 					    trace_buf_size >> 10);
6763 			else
6764 				r = sprintf(buf, "%lu\n", size >> 10);
6765 		} else
6766 			r = sprintf(buf, "X\n");
6767 	} else
6768 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6769 
6770 	mutex_unlock(&trace_types_lock);
6771 
6772 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6773 	return ret;
6774 }
6775 
6776 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6777 tracing_entries_write(struct file *filp, const char __user *ubuf,
6778 		      size_t cnt, loff_t *ppos)
6779 {
6780 	struct inode *inode = file_inode(filp);
6781 	struct trace_array *tr = inode->i_private;
6782 	unsigned long val;
6783 	int ret;
6784 
6785 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6786 	if (ret)
6787 		return ret;
6788 
6789 	/* must have at least 1 entry */
6790 	if (!val)
6791 		return -EINVAL;
6792 
6793 	/* value is in KB */
6794 	val <<= 10;
6795 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6796 	if (ret < 0)
6797 		return ret;
6798 
6799 	*ppos += cnt;
6800 
6801 	return cnt;
6802 }
6803 
6804 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6805 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6806 				size_t cnt, loff_t *ppos)
6807 {
6808 	struct trace_array *tr = filp->private_data;
6809 	char buf[64];
6810 	int r, cpu;
6811 	unsigned long size = 0, expanded_size = 0;
6812 
6813 	mutex_lock(&trace_types_lock);
6814 	for_each_tracing_cpu(cpu) {
6815 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6816 		if (!tr->ring_buffer_expanded)
6817 			expanded_size += trace_buf_size >> 10;
6818 	}
6819 	if (tr->ring_buffer_expanded)
6820 		r = sprintf(buf, "%lu\n", size);
6821 	else
6822 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6823 	mutex_unlock(&trace_types_lock);
6824 
6825 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6826 }
6827 
6828 static ssize_t
tracing_last_boot_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6829 tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6830 {
6831 	struct trace_array *tr = filp->private_data;
6832 	struct seq_buf seq;
6833 	char buf[64];
6834 
6835 	seq_buf_init(&seq, buf, 64);
6836 
6837 	seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
6838 	seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
6839 
6840 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
6841 }
6842 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)6843 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6844 {
6845 	struct trace_array *tr = inode->i_private;
6846 	int cpu = tracing_get_cpu(inode);
6847 	int ret;
6848 
6849 	ret = tracing_check_open_get_tr(tr);
6850 	if (ret)
6851 		return ret;
6852 
6853 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6854 	if (ret < 0)
6855 		__trace_array_put(tr);
6856 	return ret;
6857 }
6858 
6859 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6860 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6861 			  size_t cnt, loff_t *ppos)
6862 {
6863 	/*
6864 	 * There is no need to read what the user has written, this function
6865 	 * is just to make sure that there is no error when "echo" is used
6866 	 */
6867 
6868 	*ppos += cnt;
6869 
6870 	return cnt;
6871 }
6872 
6873 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6874 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6875 {
6876 	struct trace_array *tr = inode->i_private;
6877 
6878 	/* disable tracing ? */
6879 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6880 		tracer_tracing_off(tr);
6881 	/* resize the ring buffer to 0 */
6882 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6883 
6884 	trace_array_put(tr);
6885 
6886 	return 0;
6887 }
6888 
6889 #define TRACE_MARKER_MAX_SIZE		4096
6890 
6891 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6892 tracing_mark_write(struct file *filp, const char __user *ubuf,
6893 					size_t cnt, loff_t *fpos)
6894 {
6895 	struct trace_array *tr = filp->private_data;
6896 	struct ring_buffer_event *event;
6897 	enum event_trigger_type tt = ETT_NONE;
6898 	struct trace_buffer *buffer;
6899 	struct print_entry *entry;
6900 	int meta_size;
6901 	ssize_t written;
6902 	size_t size;
6903 	int len;
6904 
6905 /* Used in tracing_mark_raw_write() as well */
6906 #define FAULTED_STR "<faulted>"
6907 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6908 
6909 	if (tracing_disabled)
6910 		return -EINVAL;
6911 
6912 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6913 		return -EINVAL;
6914 
6915 	if ((ssize_t)cnt < 0)
6916 		return -EINVAL;
6917 
6918 	if (cnt > TRACE_MARKER_MAX_SIZE)
6919 		cnt = TRACE_MARKER_MAX_SIZE;
6920 
6921 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6922  again:
6923 	size = cnt + meta_size;
6924 
6925 	/* If less than "<faulted>", then make sure we can still add that */
6926 	if (cnt < FAULTED_SIZE)
6927 		size += FAULTED_SIZE - cnt;
6928 
6929 	buffer = tr->array_buffer.buffer;
6930 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6931 					    tracing_gen_ctx());
6932 	if (unlikely(!event)) {
6933 		/*
6934 		 * If the size was greater than what was allowed, then
6935 		 * make it smaller and try again.
6936 		 */
6937 		if (size > ring_buffer_max_event_size(buffer)) {
6938 			/* cnt < FAULTED size should never be bigger than max */
6939 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6940 				return -EBADF;
6941 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6942 			/* The above should only happen once */
6943 			if (WARN_ON_ONCE(cnt + meta_size == size))
6944 				return -EBADF;
6945 			goto again;
6946 		}
6947 
6948 		/* Ring buffer disabled, return as if not open for write */
6949 		return -EBADF;
6950 	}
6951 
6952 	entry = ring_buffer_event_data(event);
6953 	entry->ip = _THIS_IP_;
6954 
6955 	len = copy_from_user_nofault(&entry->buf, ubuf, cnt);
6956 	if (len) {
6957 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6958 		cnt = FAULTED_SIZE;
6959 		written = -EFAULT;
6960 	} else
6961 		written = cnt;
6962 
6963 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6964 		/* do not add \n before testing triggers, but add \0 */
6965 		entry->buf[cnt] = '\0';
6966 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6967 	}
6968 
6969 	if (entry->buf[cnt - 1] != '\n') {
6970 		entry->buf[cnt] = '\n';
6971 		entry->buf[cnt + 1] = '\0';
6972 	} else
6973 		entry->buf[cnt] = '\0';
6974 
6975 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6976 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6977 	__buffer_unlock_commit(buffer, event);
6978 
6979 	if (tt)
6980 		event_triggers_post_call(tr->trace_marker_file, tt);
6981 
6982 	return written;
6983 }
6984 
6985 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6986 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6987 					size_t cnt, loff_t *fpos)
6988 {
6989 	struct trace_array *tr = filp->private_data;
6990 	struct ring_buffer_event *event;
6991 	struct trace_buffer *buffer;
6992 	struct raw_data_entry *entry;
6993 	ssize_t written;
6994 	int size;
6995 	int len;
6996 
6997 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6998 
6999 	if (tracing_disabled)
7000 		return -EINVAL;
7001 
7002 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7003 		return -EINVAL;
7004 
7005 	/* The marker must at least have a tag id */
7006 	if (cnt < sizeof(unsigned int))
7007 		return -EINVAL;
7008 
7009 	size = sizeof(*entry) + cnt;
7010 	if (cnt < FAULT_SIZE_ID)
7011 		size += FAULT_SIZE_ID - cnt;
7012 
7013 	buffer = tr->array_buffer.buffer;
7014 
7015 	if (size > ring_buffer_max_event_size(buffer))
7016 		return -EINVAL;
7017 
7018 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7019 					    tracing_gen_ctx());
7020 	if (!event)
7021 		/* Ring buffer disabled, return as if not open for write */
7022 		return -EBADF;
7023 
7024 	entry = ring_buffer_event_data(event);
7025 
7026 	len = copy_from_user_nofault(&entry->id, ubuf, cnt);
7027 	if (len) {
7028 		entry->id = -1;
7029 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7030 		written = -EFAULT;
7031 	} else
7032 		written = cnt;
7033 
7034 	__buffer_unlock_commit(buffer, event);
7035 
7036 	return written;
7037 }
7038 
tracing_clock_show(struct seq_file * m,void * v)7039 static int tracing_clock_show(struct seq_file *m, void *v)
7040 {
7041 	struct trace_array *tr = m->private;
7042 	int i;
7043 
7044 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7045 		seq_printf(m,
7046 			"%s%s%s%s", i ? " " : "",
7047 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7048 			i == tr->clock_id ? "]" : "");
7049 	seq_putc(m, '\n');
7050 
7051 	return 0;
7052 }
7053 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7054 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7055 {
7056 	int i;
7057 
7058 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7059 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7060 			break;
7061 	}
7062 	if (i == ARRAY_SIZE(trace_clocks))
7063 		return -EINVAL;
7064 
7065 	mutex_lock(&trace_types_lock);
7066 
7067 	tr->clock_id = i;
7068 
7069 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7070 
7071 	/*
7072 	 * New clock may not be consistent with the previous clock.
7073 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7074 	 */
7075 	tracing_reset_online_cpus(&tr->array_buffer);
7076 
7077 #ifdef CONFIG_TRACER_MAX_TRACE
7078 	if (tr->max_buffer.buffer)
7079 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7080 	tracing_reset_online_cpus(&tr->max_buffer);
7081 #endif
7082 
7083 	mutex_unlock(&trace_types_lock);
7084 
7085 	return 0;
7086 }
7087 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7088 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7089 				   size_t cnt, loff_t *fpos)
7090 {
7091 	struct seq_file *m = filp->private_data;
7092 	struct trace_array *tr = m->private;
7093 	char buf[64];
7094 	const char *clockstr;
7095 	int ret;
7096 
7097 	if (cnt >= sizeof(buf))
7098 		return -EINVAL;
7099 
7100 	if (copy_from_user(buf, ubuf, cnt))
7101 		return -EFAULT;
7102 
7103 	buf[cnt] = 0;
7104 
7105 	clockstr = strstrip(buf);
7106 
7107 	ret = tracing_set_clock(tr, clockstr);
7108 	if (ret)
7109 		return ret;
7110 
7111 	*fpos += cnt;
7112 
7113 	return cnt;
7114 }
7115 
tracing_clock_open(struct inode * inode,struct file * file)7116 static int tracing_clock_open(struct inode *inode, struct file *file)
7117 {
7118 	struct trace_array *tr = inode->i_private;
7119 	int ret;
7120 
7121 	ret = tracing_check_open_get_tr(tr);
7122 	if (ret)
7123 		return ret;
7124 
7125 	ret = single_open(file, tracing_clock_show, inode->i_private);
7126 	if (ret < 0)
7127 		trace_array_put(tr);
7128 
7129 	return ret;
7130 }
7131 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7132 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7133 {
7134 	struct trace_array *tr = m->private;
7135 
7136 	mutex_lock(&trace_types_lock);
7137 
7138 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7139 		seq_puts(m, "delta [absolute]\n");
7140 	else
7141 		seq_puts(m, "[delta] absolute\n");
7142 
7143 	mutex_unlock(&trace_types_lock);
7144 
7145 	return 0;
7146 }
7147 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7148 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7149 {
7150 	struct trace_array *tr = inode->i_private;
7151 	int ret;
7152 
7153 	ret = tracing_check_open_get_tr(tr);
7154 	if (ret)
7155 		return ret;
7156 
7157 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7158 	if (ret < 0)
7159 		trace_array_put(tr);
7160 
7161 	return ret;
7162 }
7163 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7164 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7165 {
7166 	if (rbe == this_cpu_read(trace_buffered_event))
7167 		return ring_buffer_time_stamp(buffer);
7168 
7169 	return ring_buffer_event_time_stamp(buffer, rbe);
7170 }
7171 
7172 /*
7173  * Set or disable using the per CPU trace_buffer_event when possible.
7174  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7175 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7176 {
7177 	guard(mutex)(&trace_types_lock);
7178 
7179 	if (set && tr->no_filter_buffering_ref++)
7180 		return 0;
7181 
7182 	if (!set) {
7183 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7184 			return -EINVAL;
7185 
7186 		--tr->no_filter_buffering_ref;
7187 	}
7188 
7189 	return 0;
7190 }
7191 
7192 struct ftrace_buffer_info {
7193 	struct trace_iterator	iter;
7194 	void			*spare;
7195 	unsigned int		spare_cpu;
7196 	unsigned int		spare_size;
7197 	unsigned int		read;
7198 };
7199 
7200 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7201 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7202 {
7203 	struct trace_array *tr = inode->i_private;
7204 	struct trace_iterator *iter;
7205 	struct seq_file *m;
7206 	int ret;
7207 
7208 	ret = tracing_check_open_get_tr(tr);
7209 	if (ret)
7210 		return ret;
7211 
7212 	if (file->f_mode & FMODE_READ) {
7213 		iter = __tracing_open(inode, file, true);
7214 		if (IS_ERR(iter))
7215 			ret = PTR_ERR(iter);
7216 	} else {
7217 		/* Writes still need the seq_file to hold the private data */
7218 		ret = -ENOMEM;
7219 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7220 		if (!m)
7221 			goto out;
7222 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7223 		if (!iter) {
7224 			kfree(m);
7225 			goto out;
7226 		}
7227 		ret = 0;
7228 
7229 		iter->tr = tr;
7230 		iter->array_buffer = &tr->max_buffer;
7231 		iter->cpu_file = tracing_get_cpu(inode);
7232 		m->private = iter;
7233 		file->private_data = m;
7234 	}
7235 out:
7236 	if (ret < 0)
7237 		trace_array_put(tr);
7238 
7239 	return ret;
7240 }
7241 
tracing_swap_cpu_buffer(void * tr)7242 static void tracing_swap_cpu_buffer(void *tr)
7243 {
7244 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7245 }
7246 
7247 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7248 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7249 		       loff_t *ppos)
7250 {
7251 	struct seq_file *m = filp->private_data;
7252 	struct trace_iterator *iter = m->private;
7253 	struct trace_array *tr = iter->tr;
7254 	unsigned long val;
7255 	int ret;
7256 
7257 	ret = tracing_update_buffers(tr);
7258 	if (ret < 0)
7259 		return ret;
7260 
7261 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7262 	if (ret)
7263 		return ret;
7264 
7265 	guard(mutex)(&trace_types_lock);
7266 
7267 	if (tr->current_trace->use_max_tr)
7268 		return -EBUSY;
7269 
7270 	local_irq_disable();
7271 	arch_spin_lock(&tr->max_lock);
7272 	if (tr->cond_snapshot)
7273 		ret = -EBUSY;
7274 	arch_spin_unlock(&tr->max_lock);
7275 	local_irq_enable();
7276 	if (ret)
7277 		return ret;
7278 
7279 	switch (val) {
7280 	case 0:
7281 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7282 			return -EINVAL;
7283 		if (tr->allocated_snapshot)
7284 			free_snapshot(tr);
7285 		break;
7286 	case 1:
7287 /* Only allow per-cpu swap if the ring buffer supports it */
7288 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7289 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7290 			return -EINVAL;
7291 #endif
7292 		if (tr->allocated_snapshot)
7293 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7294 					&tr->array_buffer, iter->cpu_file);
7295 
7296 		ret = tracing_arm_snapshot_locked(tr);
7297 		if (ret)
7298 			return ret;
7299 
7300 		/* Now, we're going to swap */
7301 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7302 			local_irq_disable();
7303 			update_max_tr(tr, current, smp_processor_id(), NULL);
7304 			local_irq_enable();
7305 		} else {
7306 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7307 						 (void *)tr, 1);
7308 		}
7309 		tracing_disarm_snapshot(tr);
7310 		break;
7311 	default:
7312 		if (tr->allocated_snapshot) {
7313 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7314 				tracing_reset_online_cpus(&tr->max_buffer);
7315 			else
7316 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7317 		}
7318 		break;
7319 	}
7320 
7321 	if (ret >= 0) {
7322 		*ppos += cnt;
7323 		ret = cnt;
7324 	}
7325 
7326 	return ret;
7327 }
7328 
tracing_snapshot_release(struct inode * inode,struct file * file)7329 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7330 {
7331 	struct seq_file *m = file->private_data;
7332 	int ret;
7333 
7334 	ret = tracing_release(inode, file);
7335 
7336 	if (file->f_mode & FMODE_READ)
7337 		return ret;
7338 
7339 	/* If write only, the seq_file is just a stub */
7340 	if (m)
7341 		kfree(m->private);
7342 	kfree(m);
7343 
7344 	return 0;
7345 }
7346 
7347 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7348 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7349 				    size_t count, loff_t *ppos);
7350 static int tracing_buffers_release(struct inode *inode, struct file *file);
7351 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7352 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7353 
snapshot_raw_open(struct inode * inode,struct file * filp)7354 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7355 {
7356 	struct ftrace_buffer_info *info;
7357 	int ret;
7358 
7359 	/* The following checks for tracefs lockdown */
7360 	ret = tracing_buffers_open(inode, filp);
7361 	if (ret < 0)
7362 		return ret;
7363 
7364 	info = filp->private_data;
7365 
7366 	if (info->iter.trace->use_max_tr) {
7367 		tracing_buffers_release(inode, filp);
7368 		return -EBUSY;
7369 	}
7370 
7371 	info->iter.snapshot = true;
7372 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7373 
7374 	return ret;
7375 }
7376 
7377 #endif /* CONFIG_TRACER_SNAPSHOT */
7378 
7379 
7380 static const struct file_operations tracing_thresh_fops = {
7381 	.open		= tracing_open_generic,
7382 	.read		= tracing_thresh_read,
7383 	.write		= tracing_thresh_write,
7384 	.llseek		= generic_file_llseek,
7385 };
7386 
7387 #ifdef CONFIG_TRACER_MAX_TRACE
7388 static const struct file_operations tracing_max_lat_fops = {
7389 	.open		= tracing_open_generic_tr,
7390 	.read		= tracing_max_lat_read,
7391 	.write		= tracing_max_lat_write,
7392 	.llseek		= generic_file_llseek,
7393 	.release	= tracing_release_generic_tr,
7394 };
7395 #endif
7396 
7397 static const struct file_operations set_tracer_fops = {
7398 	.open		= tracing_open_generic_tr,
7399 	.read		= tracing_set_trace_read,
7400 	.write		= tracing_set_trace_write,
7401 	.llseek		= generic_file_llseek,
7402 	.release	= tracing_release_generic_tr,
7403 };
7404 
7405 static const struct file_operations tracing_pipe_fops = {
7406 	.open		= tracing_open_pipe,
7407 	.poll		= tracing_poll_pipe,
7408 	.read		= tracing_read_pipe,
7409 	.splice_read	= tracing_splice_read_pipe,
7410 	.release	= tracing_release_pipe,
7411 };
7412 
7413 static const struct file_operations tracing_entries_fops = {
7414 	.open		= tracing_open_generic_tr,
7415 	.read		= tracing_entries_read,
7416 	.write		= tracing_entries_write,
7417 	.llseek		= generic_file_llseek,
7418 	.release	= tracing_release_generic_tr,
7419 };
7420 
7421 static const struct file_operations tracing_buffer_meta_fops = {
7422 	.open		= tracing_buffer_meta_open,
7423 	.read		= seq_read,
7424 	.llseek		= seq_lseek,
7425 	.release	= tracing_seq_release,
7426 };
7427 
7428 static const struct file_operations tracing_total_entries_fops = {
7429 	.open		= tracing_open_generic_tr,
7430 	.read		= tracing_total_entries_read,
7431 	.llseek		= generic_file_llseek,
7432 	.release	= tracing_release_generic_tr,
7433 };
7434 
7435 static const struct file_operations tracing_free_buffer_fops = {
7436 	.open		= tracing_open_generic_tr,
7437 	.write		= tracing_free_buffer_write,
7438 	.release	= tracing_free_buffer_release,
7439 };
7440 
7441 static const struct file_operations tracing_mark_fops = {
7442 	.open		= tracing_mark_open,
7443 	.write		= tracing_mark_write,
7444 	.release	= tracing_release_generic_tr,
7445 };
7446 
7447 static const struct file_operations tracing_mark_raw_fops = {
7448 	.open		= tracing_mark_open,
7449 	.write		= tracing_mark_raw_write,
7450 	.release	= tracing_release_generic_tr,
7451 };
7452 
7453 static const struct file_operations trace_clock_fops = {
7454 	.open		= tracing_clock_open,
7455 	.read		= seq_read,
7456 	.llseek		= seq_lseek,
7457 	.release	= tracing_single_release_tr,
7458 	.write		= tracing_clock_write,
7459 };
7460 
7461 static const struct file_operations trace_time_stamp_mode_fops = {
7462 	.open		= tracing_time_stamp_mode_open,
7463 	.read		= seq_read,
7464 	.llseek		= seq_lseek,
7465 	.release	= tracing_single_release_tr,
7466 };
7467 
7468 static const struct file_operations last_boot_fops = {
7469 	.open		= tracing_open_generic_tr,
7470 	.read		= tracing_last_boot_read,
7471 	.llseek		= generic_file_llseek,
7472 	.release	= tracing_release_generic_tr,
7473 };
7474 
7475 #ifdef CONFIG_TRACER_SNAPSHOT
7476 static const struct file_operations snapshot_fops = {
7477 	.open		= tracing_snapshot_open,
7478 	.read		= seq_read,
7479 	.write		= tracing_snapshot_write,
7480 	.llseek		= tracing_lseek,
7481 	.release	= tracing_snapshot_release,
7482 };
7483 
7484 static const struct file_operations snapshot_raw_fops = {
7485 	.open		= snapshot_raw_open,
7486 	.read		= tracing_buffers_read,
7487 	.release	= tracing_buffers_release,
7488 	.splice_read	= tracing_buffers_splice_read,
7489 };
7490 
7491 #endif /* CONFIG_TRACER_SNAPSHOT */
7492 
7493 /*
7494  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7495  * @filp: The active open file structure
7496  * @ubuf: The userspace provided buffer to read value into
7497  * @cnt: The maximum number of bytes to read
7498  * @ppos: The current "file" position
7499  *
7500  * This function implements the write interface for a struct trace_min_max_param.
7501  * The filp->private_data must point to a trace_min_max_param structure that
7502  * defines where to write the value, the min and the max acceptable values,
7503  * and a lock to protect the write.
7504  */
7505 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7506 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7507 {
7508 	struct trace_min_max_param *param = filp->private_data;
7509 	u64 val;
7510 	int err;
7511 
7512 	if (!param)
7513 		return -EFAULT;
7514 
7515 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7516 	if (err)
7517 		return err;
7518 
7519 	if (param->lock)
7520 		mutex_lock(param->lock);
7521 
7522 	if (param->min && val < *param->min)
7523 		err = -EINVAL;
7524 
7525 	if (param->max && val > *param->max)
7526 		err = -EINVAL;
7527 
7528 	if (!err)
7529 		*param->val = val;
7530 
7531 	if (param->lock)
7532 		mutex_unlock(param->lock);
7533 
7534 	if (err)
7535 		return err;
7536 
7537 	return cnt;
7538 }
7539 
7540 /*
7541  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7542  * @filp: The active open file structure
7543  * @ubuf: The userspace provided buffer to read value into
7544  * @cnt: The maximum number of bytes to read
7545  * @ppos: The current "file" position
7546  *
7547  * This function implements the read interface for a struct trace_min_max_param.
7548  * The filp->private_data must point to a trace_min_max_param struct with valid
7549  * data.
7550  */
7551 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7552 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7553 {
7554 	struct trace_min_max_param *param = filp->private_data;
7555 	char buf[U64_STR_SIZE];
7556 	int len;
7557 	u64 val;
7558 
7559 	if (!param)
7560 		return -EFAULT;
7561 
7562 	val = *param->val;
7563 
7564 	if (cnt > sizeof(buf))
7565 		cnt = sizeof(buf);
7566 
7567 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7568 
7569 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7570 }
7571 
7572 const struct file_operations trace_min_max_fops = {
7573 	.open		= tracing_open_generic,
7574 	.read		= trace_min_max_read,
7575 	.write		= trace_min_max_write,
7576 };
7577 
7578 #define TRACING_LOG_ERRS_MAX	8
7579 #define TRACING_LOG_LOC_MAX	128
7580 
7581 #define CMD_PREFIX "  Command: "
7582 
7583 struct err_info {
7584 	const char	**errs;	/* ptr to loc-specific array of err strings */
7585 	u8		type;	/* index into errs -> specific err string */
7586 	u16		pos;	/* caret position */
7587 	u64		ts;
7588 };
7589 
7590 struct tracing_log_err {
7591 	struct list_head	list;
7592 	struct err_info		info;
7593 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7594 	char			*cmd;                     /* what caused err */
7595 };
7596 
7597 static DEFINE_MUTEX(tracing_err_log_lock);
7598 
alloc_tracing_log_err(int len)7599 static struct tracing_log_err *alloc_tracing_log_err(int len)
7600 {
7601 	struct tracing_log_err *err;
7602 
7603 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7604 	if (!err)
7605 		return ERR_PTR(-ENOMEM);
7606 
7607 	err->cmd = kzalloc(len, GFP_KERNEL);
7608 	if (!err->cmd) {
7609 		kfree(err);
7610 		return ERR_PTR(-ENOMEM);
7611 	}
7612 
7613 	return err;
7614 }
7615 
free_tracing_log_err(struct tracing_log_err * err)7616 static void free_tracing_log_err(struct tracing_log_err *err)
7617 {
7618 	kfree(err->cmd);
7619 	kfree(err);
7620 }
7621 
get_tracing_log_err(struct trace_array * tr,int len)7622 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7623 						   int len)
7624 {
7625 	struct tracing_log_err *err;
7626 	char *cmd;
7627 
7628 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7629 		err = alloc_tracing_log_err(len);
7630 		if (PTR_ERR(err) != -ENOMEM)
7631 			tr->n_err_log_entries++;
7632 
7633 		return err;
7634 	}
7635 	cmd = kzalloc(len, GFP_KERNEL);
7636 	if (!cmd)
7637 		return ERR_PTR(-ENOMEM);
7638 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7639 	kfree(err->cmd);
7640 	err->cmd = cmd;
7641 	list_del(&err->list);
7642 
7643 	return err;
7644 }
7645 
7646 /**
7647  * err_pos - find the position of a string within a command for error careting
7648  * @cmd: The tracing command that caused the error
7649  * @str: The string to position the caret at within @cmd
7650  *
7651  * Finds the position of the first occurrence of @str within @cmd.  The
7652  * return value can be passed to tracing_log_err() for caret placement
7653  * within @cmd.
7654  *
7655  * Returns the index within @cmd of the first occurrence of @str or 0
7656  * if @str was not found.
7657  */
err_pos(char * cmd,const char * str)7658 unsigned int err_pos(char *cmd, const char *str)
7659 {
7660 	char *found;
7661 
7662 	if (WARN_ON(!strlen(cmd)))
7663 		return 0;
7664 
7665 	found = strstr(cmd, str);
7666 	if (found)
7667 		return found - cmd;
7668 
7669 	return 0;
7670 }
7671 
7672 /**
7673  * tracing_log_err - write an error to the tracing error log
7674  * @tr: The associated trace array for the error (NULL for top level array)
7675  * @loc: A string describing where the error occurred
7676  * @cmd: The tracing command that caused the error
7677  * @errs: The array of loc-specific static error strings
7678  * @type: The index into errs[], which produces the specific static err string
7679  * @pos: The position the caret should be placed in the cmd
7680  *
7681  * Writes an error into tracing/error_log of the form:
7682  *
7683  * <loc>: error: <text>
7684  *   Command: <cmd>
7685  *              ^
7686  *
7687  * tracing/error_log is a small log file containing the last
7688  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7689  * unless there has been a tracing error, and the error log can be
7690  * cleared and have its memory freed by writing the empty string in
7691  * truncation mode to it i.e. echo > tracing/error_log.
7692  *
7693  * NOTE: the @errs array along with the @type param are used to
7694  * produce a static error string - this string is not copied and saved
7695  * when the error is logged - only a pointer to it is saved.  See
7696  * existing callers for examples of how static strings are typically
7697  * defined for use with tracing_log_err().
7698  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7699 void tracing_log_err(struct trace_array *tr,
7700 		     const char *loc, const char *cmd,
7701 		     const char **errs, u8 type, u16 pos)
7702 {
7703 	struct tracing_log_err *err;
7704 	int len = 0;
7705 
7706 	if (!tr)
7707 		tr = &global_trace;
7708 
7709 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7710 
7711 	guard(mutex)(&tracing_err_log_lock);
7712 
7713 	err = get_tracing_log_err(tr, len);
7714 	if (PTR_ERR(err) == -ENOMEM)
7715 		return;
7716 
7717 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7718 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7719 
7720 	err->info.errs = errs;
7721 	err->info.type = type;
7722 	err->info.pos = pos;
7723 	err->info.ts = local_clock();
7724 
7725 	list_add_tail(&err->list, &tr->err_log);
7726 }
7727 
clear_tracing_err_log(struct trace_array * tr)7728 static void clear_tracing_err_log(struct trace_array *tr)
7729 {
7730 	struct tracing_log_err *err, *next;
7731 
7732 	mutex_lock(&tracing_err_log_lock);
7733 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7734 		list_del(&err->list);
7735 		free_tracing_log_err(err);
7736 	}
7737 
7738 	tr->n_err_log_entries = 0;
7739 	mutex_unlock(&tracing_err_log_lock);
7740 }
7741 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7742 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7743 {
7744 	struct trace_array *tr = m->private;
7745 
7746 	mutex_lock(&tracing_err_log_lock);
7747 
7748 	return seq_list_start(&tr->err_log, *pos);
7749 }
7750 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7751 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7752 {
7753 	struct trace_array *tr = m->private;
7754 
7755 	return seq_list_next(v, &tr->err_log, pos);
7756 }
7757 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7758 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7759 {
7760 	mutex_unlock(&tracing_err_log_lock);
7761 }
7762 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7763 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7764 {
7765 	u16 i;
7766 
7767 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7768 		seq_putc(m, ' ');
7769 	for (i = 0; i < pos; i++)
7770 		seq_putc(m, ' ');
7771 	seq_puts(m, "^\n");
7772 }
7773 
tracing_err_log_seq_show(struct seq_file * m,void * v)7774 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7775 {
7776 	struct tracing_log_err *err = v;
7777 
7778 	if (err) {
7779 		const char *err_text = err->info.errs[err->info.type];
7780 		u64 sec = err->info.ts;
7781 		u32 nsec;
7782 
7783 		nsec = do_div(sec, NSEC_PER_SEC);
7784 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7785 			   err->loc, err_text);
7786 		seq_printf(m, "%s", err->cmd);
7787 		tracing_err_log_show_pos(m, err->info.pos);
7788 	}
7789 
7790 	return 0;
7791 }
7792 
7793 static const struct seq_operations tracing_err_log_seq_ops = {
7794 	.start  = tracing_err_log_seq_start,
7795 	.next   = tracing_err_log_seq_next,
7796 	.stop   = tracing_err_log_seq_stop,
7797 	.show   = tracing_err_log_seq_show
7798 };
7799 
tracing_err_log_open(struct inode * inode,struct file * file)7800 static int tracing_err_log_open(struct inode *inode, struct file *file)
7801 {
7802 	struct trace_array *tr = inode->i_private;
7803 	int ret = 0;
7804 
7805 	ret = tracing_check_open_get_tr(tr);
7806 	if (ret)
7807 		return ret;
7808 
7809 	/* If this file was opened for write, then erase contents */
7810 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7811 		clear_tracing_err_log(tr);
7812 
7813 	if (file->f_mode & FMODE_READ) {
7814 		ret = seq_open(file, &tracing_err_log_seq_ops);
7815 		if (!ret) {
7816 			struct seq_file *m = file->private_data;
7817 			m->private = tr;
7818 		} else {
7819 			trace_array_put(tr);
7820 		}
7821 	}
7822 	return ret;
7823 }
7824 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7825 static ssize_t tracing_err_log_write(struct file *file,
7826 				     const char __user *buffer,
7827 				     size_t count, loff_t *ppos)
7828 {
7829 	return count;
7830 }
7831 
tracing_err_log_release(struct inode * inode,struct file * file)7832 static int tracing_err_log_release(struct inode *inode, struct file *file)
7833 {
7834 	struct trace_array *tr = inode->i_private;
7835 
7836 	trace_array_put(tr);
7837 
7838 	if (file->f_mode & FMODE_READ)
7839 		seq_release(inode, file);
7840 
7841 	return 0;
7842 }
7843 
7844 static const struct file_operations tracing_err_log_fops = {
7845 	.open           = tracing_err_log_open,
7846 	.write		= tracing_err_log_write,
7847 	.read           = seq_read,
7848 	.llseek         = tracing_lseek,
7849 	.release        = tracing_err_log_release,
7850 };
7851 
tracing_buffers_open(struct inode * inode,struct file * filp)7852 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7853 {
7854 	struct trace_array *tr = inode->i_private;
7855 	struct ftrace_buffer_info *info;
7856 	int ret;
7857 
7858 	ret = tracing_check_open_get_tr(tr);
7859 	if (ret)
7860 		return ret;
7861 
7862 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7863 	if (!info) {
7864 		trace_array_put(tr);
7865 		return -ENOMEM;
7866 	}
7867 
7868 	mutex_lock(&trace_types_lock);
7869 
7870 	info->iter.tr		= tr;
7871 	info->iter.cpu_file	= tracing_get_cpu(inode);
7872 	info->iter.trace	= tr->current_trace;
7873 	info->iter.array_buffer = &tr->array_buffer;
7874 	info->spare		= NULL;
7875 	/* Force reading ring buffer for first read */
7876 	info->read		= (unsigned int)-1;
7877 
7878 	filp->private_data = info;
7879 
7880 	tr->trace_ref++;
7881 
7882 	mutex_unlock(&trace_types_lock);
7883 
7884 	ret = nonseekable_open(inode, filp);
7885 	if (ret < 0)
7886 		trace_array_put(tr);
7887 
7888 	return ret;
7889 }
7890 
7891 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7892 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7893 {
7894 	struct ftrace_buffer_info *info = filp->private_data;
7895 	struct trace_iterator *iter = &info->iter;
7896 
7897 	return trace_poll(iter, filp, poll_table);
7898 }
7899 
7900 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7901 tracing_buffers_read(struct file *filp, char __user *ubuf,
7902 		     size_t count, loff_t *ppos)
7903 {
7904 	struct ftrace_buffer_info *info = filp->private_data;
7905 	struct trace_iterator *iter = &info->iter;
7906 	void *trace_data;
7907 	int page_size;
7908 	ssize_t ret = 0;
7909 	ssize_t size;
7910 
7911 	if (!count)
7912 		return 0;
7913 
7914 #ifdef CONFIG_TRACER_MAX_TRACE
7915 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7916 		return -EBUSY;
7917 #endif
7918 
7919 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7920 
7921 	/* Make sure the spare matches the current sub buffer size */
7922 	if (info->spare) {
7923 		if (page_size != info->spare_size) {
7924 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7925 						   info->spare_cpu, info->spare);
7926 			info->spare = NULL;
7927 		}
7928 	}
7929 
7930 	if (!info->spare) {
7931 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7932 							  iter->cpu_file);
7933 		if (IS_ERR(info->spare)) {
7934 			ret = PTR_ERR(info->spare);
7935 			info->spare = NULL;
7936 		} else {
7937 			info->spare_cpu = iter->cpu_file;
7938 			info->spare_size = page_size;
7939 		}
7940 	}
7941 	if (!info->spare)
7942 		return ret;
7943 
7944 	/* Do we have previous read data to read? */
7945 	if (info->read < page_size)
7946 		goto read;
7947 
7948  again:
7949 	trace_access_lock(iter->cpu_file);
7950 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7951 				    info->spare,
7952 				    count,
7953 				    iter->cpu_file, 0);
7954 	trace_access_unlock(iter->cpu_file);
7955 
7956 	if (ret < 0) {
7957 		if (trace_empty(iter) && !iter->closed) {
7958 			if ((filp->f_flags & O_NONBLOCK))
7959 				return -EAGAIN;
7960 
7961 			ret = wait_on_pipe(iter, 0);
7962 			if (ret)
7963 				return ret;
7964 
7965 			goto again;
7966 		}
7967 		return 0;
7968 	}
7969 
7970 	info->read = 0;
7971  read:
7972 	size = page_size - info->read;
7973 	if (size > count)
7974 		size = count;
7975 	trace_data = ring_buffer_read_page_data(info->spare);
7976 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7977 	if (ret == size)
7978 		return -EFAULT;
7979 
7980 	size -= ret;
7981 
7982 	*ppos += size;
7983 	info->read += size;
7984 
7985 	return size;
7986 }
7987 
tracing_buffers_flush(struct file * file,fl_owner_t id)7988 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7989 {
7990 	struct ftrace_buffer_info *info = file->private_data;
7991 	struct trace_iterator *iter = &info->iter;
7992 
7993 	iter->closed = true;
7994 	/* Make sure the waiters see the new wait_index */
7995 	(void)atomic_fetch_inc_release(&iter->wait_index);
7996 
7997 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7998 
7999 	return 0;
8000 }
8001 
tracing_buffers_release(struct inode * inode,struct file * file)8002 static int tracing_buffers_release(struct inode *inode, struct file *file)
8003 {
8004 	struct ftrace_buffer_info *info = file->private_data;
8005 	struct trace_iterator *iter = &info->iter;
8006 
8007 	mutex_lock(&trace_types_lock);
8008 
8009 	iter->tr->trace_ref--;
8010 
8011 	__trace_array_put(iter->tr);
8012 
8013 	if (info->spare)
8014 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8015 					   info->spare_cpu, info->spare);
8016 	kvfree(info);
8017 
8018 	mutex_unlock(&trace_types_lock);
8019 
8020 	return 0;
8021 }
8022 
8023 struct buffer_ref {
8024 	struct trace_buffer	*buffer;
8025 	void			*page;
8026 	int			cpu;
8027 	refcount_t		refcount;
8028 };
8029 
buffer_ref_release(struct buffer_ref * ref)8030 static void buffer_ref_release(struct buffer_ref *ref)
8031 {
8032 	if (!refcount_dec_and_test(&ref->refcount))
8033 		return;
8034 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8035 	kfree(ref);
8036 }
8037 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8038 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8039 				    struct pipe_buffer *buf)
8040 {
8041 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8042 
8043 	buffer_ref_release(ref);
8044 	buf->private = 0;
8045 }
8046 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8047 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8048 				struct pipe_buffer *buf)
8049 {
8050 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8051 
8052 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8053 		return false;
8054 
8055 	refcount_inc(&ref->refcount);
8056 	return true;
8057 }
8058 
8059 /* Pipe buffer operations for a buffer. */
8060 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8061 	.release		= buffer_pipe_buf_release,
8062 	.get			= buffer_pipe_buf_get,
8063 };
8064 
8065 /*
8066  * Callback from splice_to_pipe(), if we need to release some pages
8067  * at the end of the spd in case we error'ed out in filling the pipe.
8068  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8069 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8070 {
8071 	struct buffer_ref *ref =
8072 		(struct buffer_ref *)spd->partial[i].private;
8073 
8074 	buffer_ref_release(ref);
8075 	spd->partial[i].private = 0;
8076 }
8077 
8078 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8079 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8080 			    struct pipe_inode_info *pipe, size_t len,
8081 			    unsigned int flags)
8082 {
8083 	struct ftrace_buffer_info *info = file->private_data;
8084 	struct trace_iterator *iter = &info->iter;
8085 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8086 	struct page *pages_def[PIPE_DEF_BUFFERS];
8087 	struct splice_pipe_desc spd = {
8088 		.pages		= pages_def,
8089 		.partial	= partial_def,
8090 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8091 		.ops		= &buffer_pipe_buf_ops,
8092 		.spd_release	= buffer_spd_release,
8093 	};
8094 	struct buffer_ref *ref;
8095 	bool woken = false;
8096 	int page_size;
8097 	int entries, i;
8098 	ssize_t ret = 0;
8099 
8100 #ifdef CONFIG_TRACER_MAX_TRACE
8101 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8102 		return -EBUSY;
8103 #endif
8104 
8105 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8106 	if (*ppos & (page_size - 1))
8107 		return -EINVAL;
8108 
8109 	if (len & (page_size - 1)) {
8110 		if (len < page_size)
8111 			return -EINVAL;
8112 		len &= (~(page_size - 1));
8113 	}
8114 
8115 	if (splice_grow_spd(pipe, &spd))
8116 		return -ENOMEM;
8117 
8118  again:
8119 	trace_access_lock(iter->cpu_file);
8120 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8121 
8122 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8123 		struct page *page;
8124 		int r;
8125 
8126 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8127 		if (!ref) {
8128 			ret = -ENOMEM;
8129 			break;
8130 		}
8131 
8132 		refcount_set(&ref->refcount, 1);
8133 		ref->buffer = iter->array_buffer->buffer;
8134 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8135 		if (IS_ERR(ref->page)) {
8136 			ret = PTR_ERR(ref->page);
8137 			ref->page = NULL;
8138 			kfree(ref);
8139 			break;
8140 		}
8141 		ref->cpu = iter->cpu_file;
8142 
8143 		r = ring_buffer_read_page(ref->buffer, ref->page,
8144 					  len, iter->cpu_file, 1);
8145 		if (r < 0) {
8146 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8147 						   ref->page);
8148 			kfree(ref);
8149 			break;
8150 		}
8151 
8152 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8153 
8154 		spd.pages[i] = page;
8155 		spd.partial[i].len = page_size;
8156 		spd.partial[i].offset = 0;
8157 		spd.partial[i].private = (unsigned long)ref;
8158 		spd.nr_pages++;
8159 		*ppos += page_size;
8160 
8161 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8162 	}
8163 
8164 	trace_access_unlock(iter->cpu_file);
8165 	spd.nr_pages = i;
8166 
8167 	/* did we read anything? */
8168 	if (!spd.nr_pages) {
8169 
8170 		if (ret)
8171 			goto out;
8172 
8173 		if (woken)
8174 			goto out;
8175 
8176 		ret = -EAGAIN;
8177 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8178 			goto out;
8179 
8180 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8181 		if (ret)
8182 			goto out;
8183 
8184 		/* No need to wait after waking up when tracing is off */
8185 		if (!tracer_tracing_is_on(iter->tr))
8186 			goto out;
8187 
8188 		/* Iterate one more time to collect any new data then exit */
8189 		woken = true;
8190 
8191 		goto again;
8192 	}
8193 
8194 	ret = splice_to_pipe(pipe, &spd);
8195 out:
8196 	splice_shrink_spd(&spd);
8197 
8198 	return ret;
8199 }
8200 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8201 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8202 {
8203 	struct ftrace_buffer_info *info = file->private_data;
8204 	struct trace_iterator *iter = &info->iter;
8205 	int err;
8206 
8207 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8208 		if (!(file->f_flags & O_NONBLOCK)) {
8209 			err = ring_buffer_wait(iter->array_buffer->buffer,
8210 					       iter->cpu_file,
8211 					       iter->tr->buffer_percent,
8212 					       NULL, NULL);
8213 			if (err)
8214 				return err;
8215 		}
8216 
8217 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8218 						  iter->cpu_file);
8219 	} else if (cmd) {
8220 		return -ENOTTY;
8221 	}
8222 
8223 	/*
8224 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8225 	 * waiters
8226 	 */
8227 	mutex_lock(&trace_types_lock);
8228 
8229 	/* Make sure the waiters see the new wait_index */
8230 	(void)atomic_fetch_inc_release(&iter->wait_index);
8231 
8232 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8233 
8234 	mutex_unlock(&trace_types_lock);
8235 	return 0;
8236 }
8237 
8238 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8239 static int get_snapshot_map(struct trace_array *tr)
8240 {
8241 	int err = 0;
8242 
8243 	/*
8244 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8245 	 * take trace_types_lock. Instead use the specific
8246 	 * snapshot_trigger_lock.
8247 	 */
8248 	spin_lock(&tr->snapshot_trigger_lock);
8249 
8250 	if (tr->snapshot || tr->mapped == UINT_MAX)
8251 		err = -EBUSY;
8252 	else
8253 		tr->mapped++;
8254 
8255 	spin_unlock(&tr->snapshot_trigger_lock);
8256 
8257 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8258 	if (tr->mapped == 1)
8259 		synchronize_rcu();
8260 
8261 	return err;
8262 
8263 }
put_snapshot_map(struct trace_array * tr)8264 static void put_snapshot_map(struct trace_array *tr)
8265 {
8266 	spin_lock(&tr->snapshot_trigger_lock);
8267 	if (!WARN_ON(!tr->mapped))
8268 		tr->mapped--;
8269 	spin_unlock(&tr->snapshot_trigger_lock);
8270 }
8271 #else
get_snapshot_map(struct trace_array * tr)8272 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8273 static inline void put_snapshot_map(struct trace_array *tr) { }
8274 #endif
8275 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8276 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8277 {
8278 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8279 	struct trace_iterator *iter = &info->iter;
8280 
8281 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8282 	put_snapshot_map(iter->tr);
8283 }
8284 
8285 static const struct vm_operations_struct tracing_buffers_vmops = {
8286 	.close		= tracing_buffers_mmap_close,
8287 };
8288 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8289 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8290 {
8291 	struct ftrace_buffer_info *info = filp->private_data;
8292 	struct trace_iterator *iter = &info->iter;
8293 	int ret = 0;
8294 
8295 	/* Currently the boot mapped buffer is not supported for mmap */
8296 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8297 		return -ENODEV;
8298 
8299 	ret = get_snapshot_map(iter->tr);
8300 	if (ret)
8301 		return ret;
8302 
8303 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8304 	if (ret)
8305 		put_snapshot_map(iter->tr);
8306 
8307 	vma->vm_ops = &tracing_buffers_vmops;
8308 
8309 	return ret;
8310 }
8311 
8312 static const struct file_operations tracing_buffers_fops = {
8313 	.open		= tracing_buffers_open,
8314 	.read		= tracing_buffers_read,
8315 	.poll		= tracing_buffers_poll,
8316 	.release	= tracing_buffers_release,
8317 	.flush		= tracing_buffers_flush,
8318 	.splice_read	= tracing_buffers_splice_read,
8319 	.unlocked_ioctl = tracing_buffers_ioctl,
8320 	.mmap		= tracing_buffers_mmap,
8321 };
8322 
8323 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8324 tracing_stats_read(struct file *filp, char __user *ubuf,
8325 		   size_t count, loff_t *ppos)
8326 {
8327 	struct inode *inode = file_inode(filp);
8328 	struct trace_array *tr = inode->i_private;
8329 	struct array_buffer *trace_buf = &tr->array_buffer;
8330 	int cpu = tracing_get_cpu(inode);
8331 	struct trace_seq *s;
8332 	unsigned long cnt;
8333 	unsigned long long t;
8334 	unsigned long usec_rem;
8335 
8336 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8337 	if (!s)
8338 		return -ENOMEM;
8339 
8340 	trace_seq_init(s);
8341 
8342 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8343 	trace_seq_printf(s, "entries: %ld\n", cnt);
8344 
8345 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8346 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8347 
8348 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8349 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8350 
8351 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8352 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8353 
8354 	if (trace_clocks[tr->clock_id].in_ns) {
8355 		/* local or global for trace_clock */
8356 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8357 		usec_rem = do_div(t, USEC_PER_SEC);
8358 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8359 								t, usec_rem);
8360 
8361 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8362 		usec_rem = do_div(t, USEC_PER_SEC);
8363 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8364 	} else {
8365 		/* counter or tsc mode for trace_clock */
8366 		trace_seq_printf(s, "oldest event ts: %llu\n",
8367 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8368 
8369 		trace_seq_printf(s, "now ts: %llu\n",
8370 				ring_buffer_time_stamp(trace_buf->buffer));
8371 	}
8372 
8373 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8374 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8375 
8376 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8377 	trace_seq_printf(s, "read events: %ld\n", cnt);
8378 
8379 	count = simple_read_from_buffer(ubuf, count, ppos,
8380 					s->buffer, trace_seq_used(s));
8381 
8382 	kfree(s);
8383 
8384 	return count;
8385 }
8386 
8387 static const struct file_operations tracing_stats_fops = {
8388 	.open		= tracing_open_generic_tr,
8389 	.read		= tracing_stats_read,
8390 	.llseek		= generic_file_llseek,
8391 	.release	= tracing_release_generic_tr,
8392 };
8393 
8394 #ifdef CONFIG_DYNAMIC_FTRACE
8395 
8396 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8397 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8398 		  size_t cnt, loff_t *ppos)
8399 {
8400 	ssize_t ret;
8401 	char *buf;
8402 	int r;
8403 
8404 	/* 256 should be plenty to hold the amount needed */
8405 	buf = kmalloc(256, GFP_KERNEL);
8406 	if (!buf)
8407 		return -ENOMEM;
8408 
8409 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8410 		      ftrace_update_tot_cnt,
8411 		      ftrace_number_of_pages,
8412 		      ftrace_number_of_groups);
8413 
8414 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8415 	kfree(buf);
8416 	return ret;
8417 }
8418 
8419 static const struct file_operations tracing_dyn_info_fops = {
8420 	.open		= tracing_open_generic,
8421 	.read		= tracing_read_dyn_info,
8422 	.llseek		= generic_file_llseek,
8423 };
8424 #endif /* CONFIG_DYNAMIC_FTRACE */
8425 
8426 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8427 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8428 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8429 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8430 		void *data)
8431 {
8432 	tracing_snapshot_instance(tr);
8433 }
8434 
8435 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8436 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8437 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8438 		      void *data)
8439 {
8440 	struct ftrace_func_mapper *mapper = data;
8441 	long *count = NULL;
8442 
8443 	if (mapper)
8444 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8445 
8446 	if (count) {
8447 
8448 		if (*count <= 0)
8449 			return;
8450 
8451 		(*count)--;
8452 	}
8453 
8454 	tracing_snapshot_instance(tr);
8455 }
8456 
8457 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8458 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8459 		      struct ftrace_probe_ops *ops, void *data)
8460 {
8461 	struct ftrace_func_mapper *mapper = data;
8462 	long *count = NULL;
8463 
8464 	seq_printf(m, "%ps:", (void *)ip);
8465 
8466 	seq_puts(m, "snapshot");
8467 
8468 	if (mapper)
8469 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8470 
8471 	if (count)
8472 		seq_printf(m, ":count=%ld\n", *count);
8473 	else
8474 		seq_puts(m, ":unlimited\n");
8475 
8476 	return 0;
8477 }
8478 
8479 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8480 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8481 		     unsigned long ip, void *init_data, void **data)
8482 {
8483 	struct ftrace_func_mapper *mapper = *data;
8484 
8485 	if (!mapper) {
8486 		mapper = allocate_ftrace_func_mapper();
8487 		if (!mapper)
8488 			return -ENOMEM;
8489 		*data = mapper;
8490 	}
8491 
8492 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8493 }
8494 
8495 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8496 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8497 		     unsigned long ip, void *data)
8498 {
8499 	struct ftrace_func_mapper *mapper = data;
8500 
8501 	if (!ip) {
8502 		if (!mapper)
8503 			return;
8504 		free_ftrace_func_mapper(mapper, NULL);
8505 		return;
8506 	}
8507 
8508 	ftrace_func_mapper_remove_ip(mapper, ip);
8509 }
8510 
8511 static struct ftrace_probe_ops snapshot_probe_ops = {
8512 	.func			= ftrace_snapshot,
8513 	.print			= ftrace_snapshot_print,
8514 };
8515 
8516 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8517 	.func			= ftrace_count_snapshot,
8518 	.print			= ftrace_snapshot_print,
8519 	.init			= ftrace_snapshot_init,
8520 	.free			= ftrace_snapshot_free,
8521 };
8522 
8523 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8524 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8525 			       char *glob, char *cmd, char *param, int enable)
8526 {
8527 	struct ftrace_probe_ops *ops;
8528 	void *count = (void *)-1;
8529 	char *number;
8530 	int ret;
8531 
8532 	if (!tr)
8533 		return -ENODEV;
8534 
8535 	/* hash funcs only work with set_ftrace_filter */
8536 	if (!enable)
8537 		return -EINVAL;
8538 
8539 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8540 
8541 	if (glob[0] == '!') {
8542 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8543 		if (!ret)
8544 			tracing_disarm_snapshot(tr);
8545 
8546 		return ret;
8547 	}
8548 
8549 	if (!param)
8550 		goto out_reg;
8551 
8552 	number = strsep(&param, ":");
8553 
8554 	if (!strlen(number))
8555 		goto out_reg;
8556 
8557 	/*
8558 	 * We use the callback data field (which is a pointer)
8559 	 * as our counter.
8560 	 */
8561 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8562 	if (ret)
8563 		return ret;
8564 
8565  out_reg:
8566 	ret = tracing_arm_snapshot(tr);
8567 	if (ret < 0)
8568 		return ret;
8569 
8570 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8571 	if (ret < 0)
8572 		tracing_disarm_snapshot(tr);
8573 
8574 	return ret < 0 ? ret : 0;
8575 }
8576 
8577 static struct ftrace_func_command ftrace_snapshot_cmd = {
8578 	.name			= "snapshot",
8579 	.func			= ftrace_trace_snapshot_callback,
8580 };
8581 
register_snapshot_cmd(void)8582 static __init int register_snapshot_cmd(void)
8583 {
8584 	return register_ftrace_command(&ftrace_snapshot_cmd);
8585 }
8586 #else
register_snapshot_cmd(void)8587 static inline __init int register_snapshot_cmd(void) { return 0; }
8588 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8589 
tracing_get_dentry(struct trace_array * tr)8590 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8591 {
8592 	if (WARN_ON(!tr->dir))
8593 		return ERR_PTR(-ENODEV);
8594 
8595 	/* Top directory uses NULL as the parent */
8596 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8597 		return NULL;
8598 
8599 	/* All sub buffers have a descriptor */
8600 	return tr->dir;
8601 }
8602 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8603 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8604 {
8605 	struct dentry *d_tracer;
8606 
8607 	if (tr->percpu_dir)
8608 		return tr->percpu_dir;
8609 
8610 	d_tracer = tracing_get_dentry(tr);
8611 	if (IS_ERR(d_tracer))
8612 		return NULL;
8613 
8614 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8615 
8616 	MEM_FAIL(!tr->percpu_dir,
8617 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8618 
8619 	return tr->percpu_dir;
8620 }
8621 
8622 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8623 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8624 		      void *data, long cpu, const struct file_operations *fops)
8625 {
8626 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8627 
8628 	if (ret) /* See tracing_get_cpu() */
8629 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8630 	return ret;
8631 }
8632 
8633 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8634 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8635 {
8636 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8637 	struct dentry *d_cpu;
8638 	char cpu_dir[30]; /* 30 characters should be more than enough */
8639 
8640 	if (!d_percpu)
8641 		return;
8642 
8643 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8644 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8645 	if (!d_cpu) {
8646 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8647 		return;
8648 	}
8649 
8650 	/* per cpu trace_pipe */
8651 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8652 				tr, cpu, &tracing_pipe_fops);
8653 
8654 	/* per cpu trace */
8655 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8656 				tr, cpu, &tracing_fops);
8657 
8658 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8659 				tr, cpu, &tracing_buffers_fops);
8660 
8661 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8662 				tr, cpu, &tracing_stats_fops);
8663 
8664 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8665 				tr, cpu, &tracing_entries_fops);
8666 
8667 	if (tr->range_addr_start)
8668 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8669 				      tr, cpu, &tracing_buffer_meta_fops);
8670 #ifdef CONFIG_TRACER_SNAPSHOT
8671 	if (!tr->range_addr_start) {
8672 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8673 				      tr, cpu, &snapshot_fops);
8674 
8675 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8676 				      tr, cpu, &snapshot_raw_fops);
8677 	}
8678 #endif
8679 }
8680 
8681 #ifdef CONFIG_FTRACE_SELFTEST
8682 /* Let selftest have access to static functions in this file */
8683 #include "trace_selftest.c"
8684 #endif
8685 
8686 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8687 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8688 			loff_t *ppos)
8689 {
8690 	struct trace_option_dentry *topt = filp->private_data;
8691 	char *buf;
8692 
8693 	if (topt->flags->val & topt->opt->bit)
8694 		buf = "1\n";
8695 	else
8696 		buf = "0\n";
8697 
8698 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8699 }
8700 
8701 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8702 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8703 			 loff_t *ppos)
8704 {
8705 	struct trace_option_dentry *topt = filp->private_data;
8706 	unsigned long val;
8707 	int ret;
8708 
8709 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8710 	if (ret)
8711 		return ret;
8712 
8713 	if (val != 0 && val != 1)
8714 		return -EINVAL;
8715 
8716 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8717 		mutex_lock(&trace_types_lock);
8718 		ret = __set_tracer_option(topt->tr, topt->flags,
8719 					  topt->opt, !val);
8720 		mutex_unlock(&trace_types_lock);
8721 		if (ret)
8722 			return ret;
8723 	}
8724 
8725 	*ppos += cnt;
8726 
8727 	return cnt;
8728 }
8729 
tracing_open_options(struct inode * inode,struct file * filp)8730 static int tracing_open_options(struct inode *inode, struct file *filp)
8731 {
8732 	struct trace_option_dentry *topt = inode->i_private;
8733 	int ret;
8734 
8735 	ret = tracing_check_open_get_tr(topt->tr);
8736 	if (ret)
8737 		return ret;
8738 
8739 	filp->private_data = inode->i_private;
8740 	return 0;
8741 }
8742 
tracing_release_options(struct inode * inode,struct file * file)8743 static int tracing_release_options(struct inode *inode, struct file *file)
8744 {
8745 	struct trace_option_dentry *topt = file->private_data;
8746 
8747 	trace_array_put(topt->tr);
8748 	return 0;
8749 }
8750 
8751 static const struct file_operations trace_options_fops = {
8752 	.open = tracing_open_options,
8753 	.read = trace_options_read,
8754 	.write = trace_options_write,
8755 	.llseek	= generic_file_llseek,
8756 	.release = tracing_release_options,
8757 };
8758 
8759 /*
8760  * In order to pass in both the trace_array descriptor as well as the index
8761  * to the flag that the trace option file represents, the trace_array
8762  * has a character array of trace_flags_index[], which holds the index
8763  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8764  * The address of this character array is passed to the flag option file
8765  * read/write callbacks.
8766  *
8767  * In order to extract both the index and the trace_array descriptor,
8768  * get_tr_index() uses the following algorithm.
8769  *
8770  *   idx = *ptr;
8771  *
8772  * As the pointer itself contains the address of the index (remember
8773  * index[1] == 1).
8774  *
8775  * Then to get the trace_array descriptor, by subtracting that index
8776  * from the ptr, we get to the start of the index itself.
8777  *
8778  *   ptr - idx == &index[0]
8779  *
8780  * Then a simple container_of() from that pointer gets us to the
8781  * trace_array descriptor.
8782  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8783 static void get_tr_index(void *data, struct trace_array **ptr,
8784 			 unsigned int *pindex)
8785 {
8786 	*pindex = *(unsigned char *)data;
8787 
8788 	*ptr = container_of(data - *pindex, struct trace_array,
8789 			    trace_flags_index);
8790 }
8791 
8792 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8793 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8794 			loff_t *ppos)
8795 {
8796 	void *tr_index = filp->private_data;
8797 	struct trace_array *tr;
8798 	unsigned int index;
8799 	char *buf;
8800 
8801 	get_tr_index(tr_index, &tr, &index);
8802 
8803 	if (tr->trace_flags & (1 << index))
8804 		buf = "1\n";
8805 	else
8806 		buf = "0\n";
8807 
8808 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8809 }
8810 
8811 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8812 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8813 			 loff_t *ppos)
8814 {
8815 	void *tr_index = filp->private_data;
8816 	struct trace_array *tr;
8817 	unsigned int index;
8818 	unsigned long val;
8819 	int ret;
8820 
8821 	get_tr_index(tr_index, &tr, &index);
8822 
8823 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8824 	if (ret)
8825 		return ret;
8826 
8827 	if (val != 0 && val != 1)
8828 		return -EINVAL;
8829 
8830 	mutex_lock(&event_mutex);
8831 	mutex_lock(&trace_types_lock);
8832 	ret = set_tracer_flag(tr, 1 << index, val);
8833 	mutex_unlock(&trace_types_lock);
8834 	mutex_unlock(&event_mutex);
8835 
8836 	if (ret < 0)
8837 		return ret;
8838 
8839 	*ppos += cnt;
8840 
8841 	return cnt;
8842 }
8843 
8844 static const struct file_operations trace_options_core_fops = {
8845 	.open = tracing_open_generic,
8846 	.read = trace_options_core_read,
8847 	.write = trace_options_core_write,
8848 	.llseek = generic_file_llseek,
8849 };
8850 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8851 struct dentry *trace_create_file(const char *name,
8852 				 umode_t mode,
8853 				 struct dentry *parent,
8854 				 void *data,
8855 				 const struct file_operations *fops)
8856 {
8857 	struct dentry *ret;
8858 
8859 	ret = tracefs_create_file(name, mode, parent, data, fops);
8860 	if (!ret)
8861 		pr_warn("Could not create tracefs '%s' entry\n", name);
8862 
8863 	return ret;
8864 }
8865 
8866 
trace_options_init_dentry(struct trace_array * tr)8867 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8868 {
8869 	struct dentry *d_tracer;
8870 
8871 	if (tr->options)
8872 		return tr->options;
8873 
8874 	d_tracer = tracing_get_dentry(tr);
8875 	if (IS_ERR(d_tracer))
8876 		return NULL;
8877 
8878 	tr->options = tracefs_create_dir("options", d_tracer);
8879 	if (!tr->options) {
8880 		pr_warn("Could not create tracefs directory 'options'\n");
8881 		return NULL;
8882 	}
8883 
8884 	return tr->options;
8885 }
8886 
8887 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8888 create_trace_option_file(struct trace_array *tr,
8889 			 struct trace_option_dentry *topt,
8890 			 struct tracer_flags *flags,
8891 			 struct tracer_opt *opt)
8892 {
8893 	struct dentry *t_options;
8894 
8895 	t_options = trace_options_init_dentry(tr);
8896 	if (!t_options)
8897 		return;
8898 
8899 	topt->flags = flags;
8900 	topt->opt = opt;
8901 	topt->tr = tr;
8902 
8903 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8904 					t_options, topt, &trace_options_fops);
8905 
8906 }
8907 
8908 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8909 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8910 {
8911 	struct trace_option_dentry *topts;
8912 	struct trace_options *tr_topts;
8913 	struct tracer_flags *flags;
8914 	struct tracer_opt *opts;
8915 	int cnt;
8916 	int i;
8917 
8918 	if (!tracer)
8919 		return;
8920 
8921 	flags = tracer->flags;
8922 
8923 	if (!flags || !flags->opts)
8924 		return;
8925 
8926 	/*
8927 	 * If this is an instance, only create flags for tracers
8928 	 * the instance may have.
8929 	 */
8930 	if (!trace_ok_for_array(tracer, tr))
8931 		return;
8932 
8933 	for (i = 0; i < tr->nr_topts; i++) {
8934 		/* Make sure there's no duplicate flags. */
8935 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8936 			return;
8937 	}
8938 
8939 	opts = flags->opts;
8940 
8941 	for (cnt = 0; opts[cnt].name; cnt++)
8942 		;
8943 
8944 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8945 	if (!topts)
8946 		return;
8947 
8948 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8949 			    GFP_KERNEL);
8950 	if (!tr_topts) {
8951 		kfree(topts);
8952 		return;
8953 	}
8954 
8955 	tr->topts = tr_topts;
8956 	tr->topts[tr->nr_topts].tracer = tracer;
8957 	tr->topts[tr->nr_topts].topts = topts;
8958 	tr->nr_topts++;
8959 
8960 	for (cnt = 0; opts[cnt].name; cnt++) {
8961 		create_trace_option_file(tr, &topts[cnt], flags,
8962 					 &opts[cnt]);
8963 		MEM_FAIL(topts[cnt].entry == NULL,
8964 			  "Failed to create trace option: %s",
8965 			  opts[cnt].name);
8966 	}
8967 }
8968 
8969 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8970 create_trace_option_core_file(struct trace_array *tr,
8971 			      const char *option, long index)
8972 {
8973 	struct dentry *t_options;
8974 
8975 	t_options = trace_options_init_dentry(tr);
8976 	if (!t_options)
8977 		return NULL;
8978 
8979 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8980 				 (void *)&tr->trace_flags_index[index],
8981 				 &trace_options_core_fops);
8982 }
8983 
create_trace_options_dir(struct trace_array * tr)8984 static void create_trace_options_dir(struct trace_array *tr)
8985 {
8986 	struct dentry *t_options;
8987 	bool top_level = tr == &global_trace;
8988 	int i;
8989 
8990 	t_options = trace_options_init_dentry(tr);
8991 	if (!t_options)
8992 		return;
8993 
8994 	for (i = 0; trace_options[i]; i++) {
8995 		if (top_level ||
8996 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8997 			create_trace_option_core_file(tr, trace_options[i], i);
8998 	}
8999 }
9000 
9001 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9002 rb_simple_read(struct file *filp, char __user *ubuf,
9003 	       size_t cnt, loff_t *ppos)
9004 {
9005 	struct trace_array *tr = filp->private_data;
9006 	char buf[64];
9007 	int r;
9008 
9009 	r = tracer_tracing_is_on(tr);
9010 	r = sprintf(buf, "%d\n", r);
9011 
9012 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9013 }
9014 
9015 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9016 rb_simple_write(struct file *filp, const char __user *ubuf,
9017 		size_t cnt, loff_t *ppos)
9018 {
9019 	struct trace_array *tr = filp->private_data;
9020 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9021 	unsigned long val;
9022 	int ret;
9023 
9024 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9025 	if (ret)
9026 		return ret;
9027 
9028 	if (buffer) {
9029 		mutex_lock(&trace_types_lock);
9030 		if (!!val == tracer_tracing_is_on(tr)) {
9031 			val = 0; /* do nothing */
9032 		} else if (val) {
9033 			tracer_tracing_on(tr);
9034 			if (tr->current_trace->start)
9035 				tr->current_trace->start(tr);
9036 		} else {
9037 			tracer_tracing_off(tr);
9038 			if (tr->current_trace->stop)
9039 				tr->current_trace->stop(tr);
9040 			/* Wake up any waiters */
9041 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9042 		}
9043 		mutex_unlock(&trace_types_lock);
9044 	}
9045 
9046 	(*ppos)++;
9047 
9048 	return cnt;
9049 }
9050 
9051 static const struct file_operations rb_simple_fops = {
9052 	.open		= tracing_open_generic_tr,
9053 	.read		= rb_simple_read,
9054 	.write		= rb_simple_write,
9055 	.release	= tracing_release_generic_tr,
9056 	.llseek		= default_llseek,
9057 };
9058 
9059 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9060 buffer_percent_read(struct file *filp, char __user *ubuf,
9061 		    size_t cnt, loff_t *ppos)
9062 {
9063 	struct trace_array *tr = filp->private_data;
9064 	char buf[64];
9065 	int r;
9066 
9067 	r = tr->buffer_percent;
9068 	r = sprintf(buf, "%d\n", r);
9069 
9070 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9071 }
9072 
9073 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9074 buffer_percent_write(struct file *filp, const char __user *ubuf,
9075 		     size_t cnt, loff_t *ppos)
9076 {
9077 	struct trace_array *tr = filp->private_data;
9078 	unsigned long val;
9079 	int ret;
9080 
9081 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9082 	if (ret)
9083 		return ret;
9084 
9085 	if (val > 100)
9086 		return -EINVAL;
9087 
9088 	tr->buffer_percent = val;
9089 
9090 	(*ppos)++;
9091 
9092 	return cnt;
9093 }
9094 
9095 static const struct file_operations buffer_percent_fops = {
9096 	.open		= tracing_open_generic_tr,
9097 	.read		= buffer_percent_read,
9098 	.write		= buffer_percent_write,
9099 	.release	= tracing_release_generic_tr,
9100 	.llseek		= default_llseek,
9101 };
9102 
9103 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9104 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9105 {
9106 	struct trace_array *tr = filp->private_data;
9107 	size_t size;
9108 	char buf[64];
9109 	int order;
9110 	int r;
9111 
9112 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9113 	size = (PAGE_SIZE << order) / 1024;
9114 
9115 	r = sprintf(buf, "%zd\n", size);
9116 
9117 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9118 }
9119 
9120 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9121 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9122 			 size_t cnt, loff_t *ppos)
9123 {
9124 	struct trace_array *tr = filp->private_data;
9125 	unsigned long val;
9126 	int old_order;
9127 	int order;
9128 	int pages;
9129 	int ret;
9130 
9131 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9132 	if (ret)
9133 		return ret;
9134 
9135 	val *= 1024; /* value passed in is in KB */
9136 
9137 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9138 	order = fls(pages - 1);
9139 
9140 	/* limit between 1 and 128 system pages */
9141 	if (order < 0 || order > 7)
9142 		return -EINVAL;
9143 
9144 	/* Do not allow tracing while changing the order of the ring buffer */
9145 	tracing_stop_tr(tr);
9146 
9147 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9148 	if (old_order == order)
9149 		goto out;
9150 
9151 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9152 	if (ret)
9153 		goto out;
9154 
9155 #ifdef CONFIG_TRACER_MAX_TRACE
9156 
9157 	if (!tr->allocated_snapshot)
9158 		goto out_max;
9159 
9160 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9161 	if (ret) {
9162 		/* Put back the old order */
9163 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9164 		if (WARN_ON_ONCE(cnt)) {
9165 			/*
9166 			 * AARGH! We are left with different orders!
9167 			 * The max buffer is our "snapshot" buffer.
9168 			 * When a tracer needs a snapshot (one of the
9169 			 * latency tracers), it swaps the max buffer
9170 			 * with the saved snap shot. We succeeded to
9171 			 * update the order of the main buffer, but failed to
9172 			 * update the order of the max buffer. But when we tried
9173 			 * to reset the main buffer to the original size, we
9174 			 * failed there too. This is very unlikely to
9175 			 * happen, but if it does, warn and kill all
9176 			 * tracing.
9177 			 */
9178 			tracing_disabled = 1;
9179 		}
9180 		goto out;
9181 	}
9182  out_max:
9183 #endif
9184 	(*ppos)++;
9185  out:
9186 	if (ret)
9187 		cnt = ret;
9188 	tracing_start_tr(tr);
9189 	return cnt;
9190 }
9191 
9192 static const struct file_operations buffer_subbuf_size_fops = {
9193 	.open		= tracing_open_generic_tr,
9194 	.read		= buffer_subbuf_size_read,
9195 	.write		= buffer_subbuf_size_write,
9196 	.release	= tracing_release_generic_tr,
9197 	.llseek		= default_llseek,
9198 };
9199 
9200 static struct dentry *trace_instance_dir;
9201 
9202 static void
9203 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9204 
9205 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9206 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9207 {
9208 	enum ring_buffer_flags rb_flags;
9209 
9210 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9211 
9212 	buf->tr = tr;
9213 
9214 	if (tr->range_addr_start && tr->range_addr_size) {
9215 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9216 						      tr->range_addr_start,
9217 						      tr->range_addr_size);
9218 
9219 		ring_buffer_last_boot_delta(buf->buffer,
9220 					    &tr->text_delta, &tr->data_delta);
9221 		/*
9222 		 * This is basically the same as a mapped buffer,
9223 		 * with the same restrictions.
9224 		 */
9225 		tr->mapped++;
9226 	} else {
9227 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9228 	}
9229 	if (!buf->buffer)
9230 		return -ENOMEM;
9231 
9232 	buf->data = alloc_percpu(struct trace_array_cpu);
9233 	if (!buf->data) {
9234 		ring_buffer_free(buf->buffer);
9235 		buf->buffer = NULL;
9236 		return -ENOMEM;
9237 	}
9238 
9239 	/* Allocate the first page for all buffers */
9240 	set_buffer_entries(&tr->array_buffer,
9241 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9242 
9243 	return 0;
9244 }
9245 
free_trace_buffer(struct array_buffer * buf)9246 static void free_trace_buffer(struct array_buffer *buf)
9247 {
9248 	if (buf->buffer) {
9249 		ring_buffer_free(buf->buffer);
9250 		buf->buffer = NULL;
9251 		free_percpu(buf->data);
9252 		buf->data = NULL;
9253 	}
9254 }
9255 
allocate_trace_buffers(struct trace_array * tr,int size)9256 static int allocate_trace_buffers(struct trace_array *tr, int size)
9257 {
9258 	int ret;
9259 
9260 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9261 	if (ret)
9262 		return ret;
9263 
9264 #ifdef CONFIG_TRACER_MAX_TRACE
9265 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9266 	if (tr->range_addr_start)
9267 		return 0;
9268 
9269 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9270 				    allocate_snapshot ? size : 1);
9271 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9272 		free_trace_buffer(&tr->array_buffer);
9273 		return -ENOMEM;
9274 	}
9275 	tr->allocated_snapshot = allocate_snapshot;
9276 
9277 	allocate_snapshot = false;
9278 #endif
9279 
9280 	return 0;
9281 }
9282 
free_trace_buffers(struct trace_array * tr)9283 static void free_trace_buffers(struct trace_array *tr)
9284 {
9285 	if (!tr)
9286 		return;
9287 
9288 	free_trace_buffer(&tr->array_buffer);
9289 
9290 #ifdef CONFIG_TRACER_MAX_TRACE
9291 	free_trace_buffer(&tr->max_buffer);
9292 #endif
9293 }
9294 
init_trace_flags_index(struct trace_array * tr)9295 static void init_trace_flags_index(struct trace_array *tr)
9296 {
9297 	int i;
9298 
9299 	/* Used by the trace options files */
9300 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9301 		tr->trace_flags_index[i] = i;
9302 }
9303 
__update_tracer_options(struct trace_array * tr)9304 static void __update_tracer_options(struct trace_array *tr)
9305 {
9306 	struct tracer *t;
9307 
9308 	for (t = trace_types; t; t = t->next)
9309 		add_tracer_options(tr, t);
9310 }
9311 
update_tracer_options(struct trace_array * tr)9312 static void update_tracer_options(struct trace_array *tr)
9313 {
9314 	mutex_lock(&trace_types_lock);
9315 	tracer_options_updated = true;
9316 	__update_tracer_options(tr);
9317 	mutex_unlock(&trace_types_lock);
9318 }
9319 
9320 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9321 struct trace_array *trace_array_find(const char *instance)
9322 {
9323 	struct trace_array *tr, *found = NULL;
9324 
9325 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9326 		if (tr->name && strcmp(tr->name, instance) == 0) {
9327 			found = tr;
9328 			break;
9329 		}
9330 	}
9331 
9332 	return found;
9333 }
9334 
trace_array_find_get(const char * instance)9335 struct trace_array *trace_array_find_get(const char *instance)
9336 {
9337 	struct trace_array *tr;
9338 
9339 	mutex_lock(&trace_types_lock);
9340 	tr = trace_array_find(instance);
9341 	if (tr)
9342 		tr->ref++;
9343 	mutex_unlock(&trace_types_lock);
9344 
9345 	return tr;
9346 }
9347 
trace_array_create_dir(struct trace_array * tr)9348 static int trace_array_create_dir(struct trace_array *tr)
9349 {
9350 	int ret;
9351 
9352 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9353 	if (!tr->dir)
9354 		return -EINVAL;
9355 
9356 	ret = event_trace_add_tracer(tr->dir, tr);
9357 	if (ret) {
9358 		tracefs_remove(tr->dir);
9359 		return ret;
9360 	}
9361 
9362 	init_tracer_tracefs(tr, tr->dir);
9363 	__update_tracer_options(tr);
9364 
9365 	return ret;
9366 }
9367 
9368 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9369 trace_array_create_systems(const char *name, const char *systems,
9370 			   unsigned long range_addr_start,
9371 			   unsigned long range_addr_size)
9372 {
9373 	struct trace_array *tr;
9374 	int ret;
9375 
9376 	ret = -ENOMEM;
9377 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9378 	if (!tr)
9379 		return ERR_PTR(ret);
9380 
9381 	tr->name = kstrdup(name, GFP_KERNEL);
9382 	if (!tr->name)
9383 		goto out_free_tr;
9384 
9385 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9386 		goto out_free_tr;
9387 
9388 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9389 		goto out_free_tr;
9390 
9391 	if (systems) {
9392 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9393 		if (!tr->system_names)
9394 			goto out_free_tr;
9395 	}
9396 
9397 	/* Only for boot up memory mapped ring buffers */
9398 	tr->range_addr_start = range_addr_start;
9399 	tr->range_addr_size = range_addr_size;
9400 
9401 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9402 
9403 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9404 
9405 	raw_spin_lock_init(&tr->start_lock);
9406 
9407 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9408 #ifdef CONFIG_TRACER_MAX_TRACE
9409 	spin_lock_init(&tr->snapshot_trigger_lock);
9410 #endif
9411 	tr->current_trace = &nop_trace;
9412 
9413 	INIT_LIST_HEAD(&tr->systems);
9414 	INIT_LIST_HEAD(&tr->events);
9415 	INIT_LIST_HEAD(&tr->hist_vars);
9416 	INIT_LIST_HEAD(&tr->err_log);
9417 
9418 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9419 		goto out_free_tr;
9420 
9421 	/* The ring buffer is defaultly expanded */
9422 	trace_set_ring_buffer_expanded(tr);
9423 
9424 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9425 		goto out_free_tr;
9426 
9427 	ftrace_init_trace_array(tr);
9428 
9429 	init_trace_flags_index(tr);
9430 
9431 	if (trace_instance_dir) {
9432 		ret = trace_array_create_dir(tr);
9433 		if (ret)
9434 			goto out_free_tr;
9435 	} else
9436 		__trace_early_add_events(tr);
9437 
9438 	list_add(&tr->list, &ftrace_trace_arrays);
9439 
9440 	tr->ref++;
9441 
9442 	return tr;
9443 
9444  out_free_tr:
9445 	ftrace_free_ftrace_ops(tr);
9446 	free_trace_buffers(tr);
9447 	free_cpumask_var(tr->pipe_cpumask);
9448 	free_cpumask_var(tr->tracing_cpumask);
9449 	kfree_const(tr->system_names);
9450 	kfree(tr->name);
9451 	kfree(tr);
9452 
9453 	return ERR_PTR(ret);
9454 }
9455 
trace_array_create(const char * name)9456 static struct trace_array *trace_array_create(const char *name)
9457 {
9458 	return trace_array_create_systems(name, NULL, 0, 0);
9459 }
9460 
instance_mkdir(const char * name)9461 static int instance_mkdir(const char *name)
9462 {
9463 	struct trace_array *tr;
9464 	int ret;
9465 
9466 	guard(mutex)(&event_mutex);
9467 	guard(mutex)(&trace_types_lock);
9468 
9469 	ret = -EEXIST;
9470 	if (trace_array_find(name))
9471 		return -EEXIST;
9472 
9473 	tr = trace_array_create(name);
9474 
9475 	ret = PTR_ERR_OR_ZERO(tr);
9476 
9477 	return ret;
9478 }
9479 
map_pages(u64 start,u64 size)9480 static u64 map_pages(u64 start, u64 size)
9481 {
9482 	struct page **pages;
9483 	phys_addr_t page_start;
9484 	unsigned int page_count;
9485 	unsigned int i;
9486 	void *vaddr;
9487 
9488 	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9489 
9490 	page_start = start;
9491 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9492 	if (!pages)
9493 		return 0;
9494 
9495 	for (i = 0; i < page_count; i++) {
9496 		phys_addr_t addr = page_start + i * PAGE_SIZE;
9497 		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9498 	}
9499 	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9500 	kfree(pages);
9501 
9502 	return (u64)(unsigned long)vaddr;
9503 }
9504 
9505 /**
9506  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9507  * @name: The name of the trace array to be looked up/created.
9508  * @systems: A list of systems to create event directories for (NULL for all)
9509  *
9510  * Returns pointer to trace array with given name.
9511  * NULL, if it cannot be created.
9512  *
9513  * NOTE: This function increments the reference counter associated with the
9514  * trace array returned. This makes sure it cannot be freed while in use.
9515  * Use trace_array_put() once the trace array is no longer needed.
9516  * If the trace_array is to be freed, trace_array_destroy() needs to
9517  * be called after the trace_array_put(), or simply let user space delete
9518  * it from the tracefs instances directory. But until the
9519  * trace_array_put() is called, user space can not delete it.
9520  *
9521  */
trace_array_get_by_name(const char * name,const char * systems)9522 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9523 {
9524 	struct trace_array *tr;
9525 
9526 	guard(mutex)(&event_mutex);
9527 	guard(mutex)(&trace_types_lock);
9528 
9529 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9530 		if (tr->name && strcmp(tr->name, name) == 0) {
9531 			tr->ref++;
9532 			return tr;
9533 		}
9534 	}
9535 
9536 	tr = trace_array_create_systems(name, systems, 0, 0);
9537 
9538 	if (IS_ERR(tr))
9539 		tr = NULL;
9540 	else
9541 		tr->ref++;
9542 
9543 	return tr;
9544 }
9545 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9546 
__remove_instance(struct trace_array * tr)9547 static int __remove_instance(struct trace_array *tr)
9548 {
9549 	int i;
9550 
9551 	/* Reference counter for a newly created trace array = 1. */
9552 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9553 		return -EBUSY;
9554 
9555 	list_del(&tr->list);
9556 
9557 	/* Disable all the flags that were enabled coming in */
9558 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9559 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9560 			set_tracer_flag(tr, 1 << i, 0);
9561 	}
9562 
9563 	if (printk_trace == tr)
9564 		update_printk_trace(&global_trace);
9565 
9566 	tracing_set_nop(tr);
9567 	clear_ftrace_function_probes(tr);
9568 	event_trace_del_tracer(tr);
9569 	ftrace_clear_pids(tr);
9570 	ftrace_destroy_function_files(tr);
9571 	tracefs_remove(tr->dir);
9572 	free_percpu(tr->last_func_repeats);
9573 	free_trace_buffers(tr);
9574 	clear_tracing_err_log(tr);
9575 
9576 	for (i = 0; i < tr->nr_topts; i++) {
9577 		kfree(tr->topts[i].topts);
9578 	}
9579 	kfree(tr->topts);
9580 
9581 	free_cpumask_var(tr->pipe_cpumask);
9582 	free_cpumask_var(tr->tracing_cpumask);
9583 	kfree_const(tr->system_names);
9584 	kfree(tr->name);
9585 	kfree(tr);
9586 
9587 	return 0;
9588 }
9589 
trace_array_destroy(struct trace_array * this_tr)9590 int trace_array_destroy(struct trace_array *this_tr)
9591 {
9592 	struct trace_array *tr;
9593 
9594 	if (!this_tr)
9595 		return -EINVAL;
9596 
9597 	guard(mutex)(&event_mutex);
9598 	guard(mutex)(&trace_types_lock);
9599 
9600 
9601 	/* Making sure trace array exists before destroying it. */
9602 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9603 		if (tr == this_tr)
9604 			return __remove_instance(tr);
9605 	}
9606 
9607 	return -ENODEV;
9608 }
9609 EXPORT_SYMBOL_GPL(trace_array_destroy);
9610 
instance_rmdir(const char * name)9611 static int instance_rmdir(const char *name)
9612 {
9613 	struct trace_array *tr;
9614 
9615 	guard(mutex)(&event_mutex);
9616 	guard(mutex)(&trace_types_lock);
9617 
9618 	tr = trace_array_find(name);
9619 	if (!tr)
9620 		return -ENODEV;
9621 
9622 	return __remove_instance(tr);
9623 }
9624 
create_trace_instances(struct dentry * d_tracer)9625 static __init void create_trace_instances(struct dentry *d_tracer)
9626 {
9627 	struct trace_array *tr;
9628 
9629 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9630 							 instance_mkdir,
9631 							 instance_rmdir);
9632 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9633 		return;
9634 
9635 	guard(mutex)(&event_mutex);
9636 	guard(mutex)(&trace_types_lock);
9637 
9638 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9639 		if (!tr->name)
9640 			continue;
9641 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9642 			     "Failed to create instance directory\n"))
9643 			return;
9644 	}
9645 }
9646 
9647 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9648 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9649 {
9650 	int cpu;
9651 
9652 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9653 			tr, &show_traces_fops);
9654 
9655 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9656 			tr, &set_tracer_fops);
9657 
9658 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9659 			  tr, &tracing_cpumask_fops);
9660 
9661 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9662 			  tr, &tracing_iter_fops);
9663 
9664 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9665 			  tr, &tracing_fops);
9666 
9667 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9668 			  tr, &tracing_pipe_fops);
9669 
9670 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9671 			  tr, &tracing_entries_fops);
9672 
9673 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9674 			  tr, &tracing_total_entries_fops);
9675 
9676 	trace_create_file("free_buffer", 0200, d_tracer,
9677 			  tr, &tracing_free_buffer_fops);
9678 
9679 	trace_create_file("trace_marker", 0220, d_tracer,
9680 			  tr, &tracing_mark_fops);
9681 
9682 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9683 
9684 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9685 			  tr, &tracing_mark_raw_fops);
9686 
9687 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9688 			  &trace_clock_fops);
9689 
9690 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9691 			  tr, &rb_simple_fops);
9692 
9693 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9694 			  &trace_time_stamp_mode_fops);
9695 
9696 	tr->buffer_percent = 50;
9697 
9698 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9699 			tr, &buffer_percent_fops);
9700 
9701 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9702 			  tr, &buffer_subbuf_size_fops);
9703 
9704 	create_trace_options_dir(tr);
9705 
9706 #ifdef CONFIG_TRACER_MAX_TRACE
9707 	trace_create_maxlat_file(tr, d_tracer);
9708 #endif
9709 
9710 	if (ftrace_create_function_files(tr, d_tracer))
9711 		MEM_FAIL(1, "Could not allocate function filter files");
9712 
9713 	if (tr->range_addr_start) {
9714 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9715 				  tr, &last_boot_fops);
9716 #ifdef CONFIG_TRACER_SNAPSHOT
9717 	} else {
9718 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9719 				  tr, &snapshot_fops);
9720 #endif
9721 	}
9722 
9723 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9724 			  tr, &tracing_err_log_fops);
9725 
9726 	for_each_tracing_cpu(cpu)
9727 		tracing_init_tracefs_percpu(tr, cpu);
9728 
9729 	ftrace_init_tracefs(tr, d_tracer);
9730 }
9731 
trace_automount(struct dentry * mntpt,void * ingore)9732 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9733 {
9734 	struct vfsmount *mnt;
9735 	struct file_system_type *type;
9736 
9737 	/*
9738 	 * To maintain backward compatibility for tools that mount
9739 	 * debugfs to get to the tracing facility, tracefs is automatically
9740 	 * mounted to the debugfs/tracing directory.
9741 	 */
9742 	type = get_fs_type("tracefs");
9743 	if (!type)
9744 		return NULL;
9745 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9746 	put_filesystem(type);
9747 	if (IS_ERR(mnt))
9748 		return NULL;
9749 	mntget(mnt);
9750 
9751 	return mnt;
9752 }
9753 
9754 /**
9755  * tracing_init_dentry - initialize top level trace array
9756  *
9757  * This is called when creating files or directories in the tracing
9758  * directory. It is called via fs_initcall() by any of the boot up code
9759  * and expects to return the dentry of the top level tracing directory.
9760  */
tracing_init_dentry(void)9761 int tracing_init_dentry(void)
9762 {
9763 	struct trace_array *tr = &global_trace;
9764 
9765 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9766 		pr_warn("Tracing disabled due to lockdown\n");
9767 		return -EPERM;
9768 	}
9769 
9770 	/* The top level trace array uses  NULL as parent */
9771 	if (tr->dir)
9772 		return 0;
9773 
9774 	if (WARN_ON(!tracefs_initialized()))
9775 		return -ENODEV;
9776 
9777 	/*
9778 	 * As there may still be users that expect the tracing
9779 	 * files to exist in debugfs/tracing, we must automount
9780 	 * the tracefs file system there, so older tools still
9781 	 * work with the newer kernel.
9782 	 */
9783 	tr->dir = debugfs_create_automount("tracing", NULL,
9784 					   trace_automount, NULL);
9785 
9786 	return 0;
9787 }
9788 
9789 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9790 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9791 
9792 static struct workqueue_struct *eval_map_wq __initdata;
9793 static struct work_struct eval_map_work __initdata;
9794 static struct work_struct tracerfs_init_work __initdata;
9795 
eval_map_work_func(struct work_struct * work)9796 static void __init eval_map_work_func(struct work_struct *work)
9797 {
9798 	int len;
9799 
9800 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9801 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9802 }
9803 
trace_eval_init(void)9804 static int __init trace_eval_init(void)
9805 {
9806 	INIT_WORK(&eval_map_work, eval_map_work_func);
9807 
9808 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9809 	if (!eval_map_wq) {
9810 		pr_err("Unable to allocate eval_map_wq\n");
9811 		/* Do work here */
9812 		eval_map_work_func(&eval_map_work);
9813 		return -ENOMEM;
9814 	}
9815 
9816 	queue_work(eval_map_wq, &eval_map_work);
9817 	return 0;
9818 }
9819 
9820 subsys_initcall(trace_eval_init);
9821 
trace_eval_sync(void)9822 static int __init trace_eval_sync(void)
9823 {
9824 	/* Make sure the eval map updates are finished */
9825 	if (eval_map_wq)
9826 		destroy_workqueue(eval_map_wq);
9827 	return 0;
9828 }
9829 
9830 late_initcall_sync(trace_eval_sync);
9831 
9832 
9833 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9834 static void trace_module_add_evals(struct module *mod)
9835 {
9836 	if (!mod->num_trace_evals)
9837 		return;
9838 
9839 	/*
9840 	 * Modules with bad taint do not have events created, do
9841 	 * not bother with enums either.
9842 	 */
9843 	if (trace_module_has_bad_taint(mod))
9844 		return;
9845 
9846 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9847 }
9848 
9849 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9850 static void trace_module_remove_evals(struct module *mod)
9851 {
9852 	union trace_eval_map_item *map;
9853 	union trace_eval_map_item **last = &trace_eval_maps;
9854 
9855 	if (!mod->num_trace_evals)
9856 		return;
9857 
9858 	guard(mutex)(&trace_eval_mutex);
9859 
9860 	map = trace_eval_maps;
9861 
9862 	while (map) {
9863 		if (map->head.mod == mod)
9864 			break;
9865 		map = trace_eval_jmp_to_tail(map);
9866 		last = &map->tail.next;
9867 		map = map->tail.next;
9868 	}
9869 	if (!map)
9870 		return;
9871 
9872 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9873 	kfree(map);
9874 }
9875 #else
trace_module_remove_evals(struct module * mod)9876 static inline void trace_module_remove_evals(struct module *mod) { }
9877 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9878 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9879 static int trace_module_notify(struct notifier_block *self,
9880 			       unsigned long val, void *data)
9881 {
9882 	struct module *mod = data;
9883 
9884 	switch (val) {
9885 	case MODULE_STATE_COMING:
9886 		trace_module_add_evals(mod);
9887 		break;
9888 	case MODULE_STATE_GOING:
9889 		trace_module_remove_evals(mod);
9890 		break;
9891 	}
9892 
9893 	return NOTIFY_OK;
9894 }
9895 
9896 static struct notifier_block trace_module_nb = {
9897 	.notifier_call = trace_module_notify,
9898 	.priority = 0,
9899 };
9900 #endif /* CONFIG_MODULES */
9901 
tracer_init_tracefs_work_func(struct work_struct * work)9902 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9903 {
9904 
9905 	event_trace_init();
9906 
9907 	init_tracer_tracefs(&global_trace, NULL);
9908 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9909 
9910 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9911 			&global_trace, &tracing_thresh_fops);
9912 
9913 	trace_create_file("README", TRACE_MODE_READ, NULL,
9914 			NULL, &tracing_readme_fops);
9915 
9916 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9917 			NULL, &tracing_saved_cmdlines_fops);
9918 
9919 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9920 			  NULL, &tracing_saved_cmdlines_size_fops);
9921 
9922 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9923 			NULL, &tracing_saved_tgids_fops);
9924 
9925 	trace_create_eval_file(NULL);
9926 
9927 #ifdef CONFIG_MODULES
9928 	register_module_notifier(&trace_module_nb);
9929 #endif
9930 
9931 #ifdef CONFIG_DYNAMIC_FTRACE
9932 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9933 			NULL, &tracing_dyn_info_fops);
9934 #endif
9935 
9936 	create_trace_instances(NULL);
9937 
9938 	update_tracer_options(&global_trace);
9939 }
9940 
tracer_init_tracefs(void)9941 static __init int tracer_init_tracefs(void)
9942 {
9943 	int ret;
9944 
9945 	trace_access_lock_init();
9946 
9947 	ret = tracing_init_dentry();
9948 	if (ret)
9949 		return 0;
9950 
9951 	if (eval_map_wq) {
9952 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9953 		queue_work(eval_map_wq, &tracerfs_init_work);
9954 	} else {
9955 		tracer_init_tracefs_work_func(NULL);
9956 	}
9957 
9958 	rv_init_interface();
9959 
9960 	return 0;
9961 }
9962 
9963 fs_initcall(tracer_init_tracefs);
9964 
9965 static int trace_die_panic_handler(struct notifier_block *self,
9966 				unsigned long ev, void *unused);
9967 
9968 static struct notifier_block trace_panic_notifier = {
9969 	.notifier_call = trace_die_panic_handler,
9970 	.priority = INT_MAX - 1,
9971 };
9972 
9973 static struct notifier_block trace_die_notifier = {
9974 	.notifier_call = trace_die_panic_handler,
9975 	.priority = INT_MAX - 1,
9976 };
9977 
9978 /*
9979  * The idea is to execute the following die/panic callback early, in order
9980  * to avoid showing irrelevant information in the trace (like other panic
9981  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9982  * warnings get disabled (to prevent potential log flooding).
9983  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)9984 static int trace_die_panic_handler(struct notifier_block *self,
9985 				unsigned long ev, void *unused)
9986 {
9987 	bool ftrace_check = false;
9988 
9989 	trace_android_vh_ftrace_oops_enter(&ftrace_check);
9990 
9991 	if (!ftrace_dump_on_oops_enabled() || ftrace_check)
9992 		return NOTIFY_DONE;
9993 
9994 	/* The die notifier requires DIE_OOPS to trigger */
9995 	if (self == &trace_die_notifier && ev != DIE_OOPS)
9996 		return NOTIFY_DONE;
9997 
9998 	ftrace_dump(DUMP_PARAM);
9999 
10000 	trace_android_vh_ftrace_oops_exit(&ftrace_check);
10001 	return NOTIFY_DONE;
10002 }
10003 
10004 /*
10005  * printk is set to max of 1024, we really don't need it that big.
10006  * Nothing should be printing 1000 characters anyway.
10007  */
10008 #define TRACE_MAX_PRINT		1000
10009 
10010 /*
10011  * Define here KERN_TRACE so that we have one place to modify
10012  * it if we decide to change what log level the ftrace dump
10013  * should be at.
10014  */
10015 #define KERN_TRACE		KERN_EMERG
10016 
10017 void
trace_printk_seq(struct trace_seq * s)10018 trace_printk_seq(struct trace_seq *s)
10019 {
10020 	bool dump_printk = true;
10021 
10022 	/* Probably should print a warning here. */
10023 	if (s->seq.len >= TRACE_MAX_PRINT)
10024 		s->seq.len = TRACE_MAX_PRINT;
10025 
10026 	/*
10027 	 * More paranoid code. Although the buffer size is set to
10028 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10029 	 * an extra layer of protection.
10030 	 */
10031 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10032 		s->seq.len = s->seq.size - 1;
10033 
10034 	/* should be zero ended, but we are paranoid. */
10035 	s->buffer[s->seq.len] = 0;
10036 
10037 	trace_android_vh_ftrace_dump_buffer(s, &dump_printk);
10038 	if (dump_printk)
10039 		printk(KERN_TRACE "%s", s->buffer);
10040 
10041 	trace_seq_init(s);
10042 }
10043 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10044 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10045 {
10046 	iter->tr = tr;
10047 	iter->trace = iter->tr->current_trace;
10048 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10049 	iter->array_buffer = &tr->array_buffer;
10050 
10051 	if (iter->trace && iter->trace->open)
10052 		iter->trace->open(iter);
10053 
10054 	/* Annotate start of buffers if we had overruns */
10055 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10056 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10057 
10058 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10059 	if (trace_clocks[iter->tr->clock_id].in_ns)
10060 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10061 
10062 	/* Can not use kmalloc for iter.temp and iter.fmt */
10063 	iter->temp = static_temp_buf;
10064 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10065 	iter->fmt = static_fmt_buf;
10066 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10067 }
10068 
trace_init_global_iter(struct trace_iterator * iter)10069 void trace_init_global_iter(struct trace_iterator *iter)
10070 {
10071 	trace_init_iter(iter, &global_trace);
10072 }
10073 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10074 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10075 {
10076 	/* use static because iter can be a bit big for the stack */
10077 	static struct trace_iterator iter;
10078 	unsigned int old_userobj;
10079 	unsigned long flags;
10080 	int cnt = 0, cpu;
10081 	bool ftrace_check = true;
10082 	bool ftrace_size_check = false;
10083 	unsigned long size;
10084 
10085 	/*
10086 	 * Always turn off tracing when we dump.
10087 	 * We don't need to show trace output of what happens
10088 	 * between multiple crashes.
10089 	 *
10090 	 * If the user does a sysrq-z, then they can re-enable
10091 	 * tracing with echo 1 > tracing_on.
10092 	 */
10093 	tracer_tracing_off(tr);
10094 
10095 	local_irq_save(flags);
10096 
10097 	/* Simulate the iterator */
10098 	trace_init_iter(&iter, tr);
10099 
10100 	for_each_tracing_cpu(cpu) {
10101 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10102 		size = ring_buffer_size(iter.array_buffer->buffer, cpu);
10103 		trace_android_vh_ftrace_size_check(size, &ftrace_size_check);
10104 	}
10105 
10106 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10107 
10108 	/* don't look at user memory in panic mode */
10109 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10110 
10111 	if (ftrace_size_check)
10112 		goto out_enable;
10113 
10114 	if (dump_mode == DUMP_ORIG)
10115 		iter.cpu_file = raw_smp_processor_id();
10116 	else
10117 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10118 
10119 	if (tr == &global_trace)
10120 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10121 	else
10122 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10123 
10124 	/* Did function tracer already get disabled? */
10125 	if (ftrace_is_dead()) {
10126 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10127 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10128 	}
10129 
10130 	/*
10131 	 * Ftrace timestmap support two types:
10132 	 * - ftrace_check = 1, latency format, start with 0 from a specific time.
10133 	 * - ftrace_check = 0, absolute time format, consistent with kernel time.
10134 	 * With this vendor hook, we can choose the format from different requirement.
10135 	 */
10136 	trace_android_vh_ftrace_format_check(&ftrace_check);
10137 
10138 	/*
10139 	 * We need to stop all tracing on all CPUS to read
10140 	 * the next buffer. This is a bit expensive, but is
10141 	 * not done often. We fill all what we can read,
10142 	 * and then release the locks again.
10143 	 */
10144 
10145 	while (!trace_empty(&iter)) {
10146 		if (!cnt)
10147 			printk(KERN_TRACE "---------------------------------\n");
10148 
10149 		cnt++;
10150 
10151 		trace_iterator_reset(&iter);
10152 		if (ftrace_check)
10153 			iter.iter_flags |= TRACE_FILE_LAT_FMT;
10154 
10155 		if (trace_find_next_entry_inc(&iter) != NULL) {
10156 			int ret;
10157 
10158 			ret = print_trace_line(&iter);
10159 			if (ret != TRACE_TYPE_NO_CONSUME)
10160 				trace_consume(&iter);
10161 
10162 			trace_printk_seq(&iter.seq);
10163 		}
10164 		touch_nmi_watchdog();
10165 	}
10166 
10167 	if (!cnt)
10168 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10169 	else
10170 		printk(KERN_TRACE "---------------------------------\n");
10171 
10172 out_enable:
10173 	tr->trace_flags |= old_userobj;
10174 
10175 	for_each_tracing_cpu(cpu) {
10176 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10177 	}
10178 	local_irq_restore(flags);
10179 }
10180 
ftrace_dump_by_param(void)10181 static void ftrace_dump_by_param(void)
10182 {
10183 	bool first_param = true;
10184 	char dump_param[MAX_TRACER_SIZE];
10185 	char *buf, *token, *inst_name;
10186 	struct trace_array *tr;
10187 
10188 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10189 	buf = dump_param;
10190 
10191 	while ((token = strsep(&buf, ",")) != NULL) {
10192 		if (first_param) {
10193 			first_param = false;
10194 			if (!strcmp("0", token))
10195 				continue;
10196 			else if (!strcmp("1", token)) {
10197 				ftrace_dump_one(&global_trace, DUMP_ALL);
10198 				continue;
10199 			}
10200 			else if (!strcmp("2", token) ||
10201 			  !strcmp("orig_cpu", token)) {
10202 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10203 				continue;
10204 			}
10205 		}
10206 
10207 		inst_name = strsep(&token, "=");
10208 		tr = trace_array_find(inst_name);
10209 		if (!tr) {
10210 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10211 			continue;
10212 		}
10213 
10214 		if (token && (!strcmp("2", token) ||
10215 			  !strcmp("orig_cpu", token)))
10216 			ftrace_dump_one(tr, DUMP_ORIG);
10217 		else
10218 			ftrace_dump_one(tr, DUMP_ALL);
10219 	}
10220 }
10221 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10222 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10223 {
10224 	static atomic_t dump_running;
10225 
10226 	/* Only allow one dump user at a time. */
10227 	if (atomic_inc_return(&dump_running) != 1) {
10228 		atomic_dec(&dump_running);
10229 		return;
10230 	}
10231 
10232 	switch (oops_dump_mode) {
10233 	case DUMP_ALL:
10234 		ftrace_dump_one(&global_trace, DUMP_ALL);
10235 		break;
10236 	case DUMP_ORIG:
10237 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10238 		break;
10239 	case DUMP_PARAM:
10240 		ftrace_dump_by_param();
10241 		break;
10242 	case DUMP_NONE:
10243 		break;
10244 	default:
10245 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10246 		ftrace_dump_one(&global_trace, DUMP_ALL);
10247 	}
10248 
10249 	atomic_dec(&dump_running);
10250 }
10251 EXPORT_SYMBOL_GPL(ftrace_dump);
10252 
10253 #define WRITE_BUFSIZE  4096
10254 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10255 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10256 				size_t count, loff_t *ppos,
10257 				int (*createfn)(const char *))
10258 {
10259 	char *kbuf, *buf, *tmp;
10260 	int ret = 0;
10261 	size_t done = 0;
10262 	size_t size;
10263 
10264 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10265 	if (!kbuf)
10266 		return -ENOMEM;
10267 
10268 	while (done < count) {
10269 		size = count - done;
10270 
10271 		if (size >= WRITE_BUFSIZE)
10272 			size = WRITE_BUFSIZE - 1;
10273 
10274 		if (copy_from_user(kbuf, buffer + done, size)) {
10275 			ret = -EFAULT;
10276 			goto out;
10277 		}
10278 		kbuf[size] = '\0';
10279 		buf = kbuf;
10280 		do {
10281 			tmp = strchr(buf, '\n');
10282 			if (tmp) {
10283 				*tmp = '\0';
10284 				size = tmp - buf + 1;
10285 			} else {
10286 				size = strlen(buf);
10287 				if (done + size < count) {
10288 					if (buf != kbuf)
10289 						break;
10290 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10291 					pr_warn("Line length is too long: Should be less than %d\n",
10292 						WRITE_BUFSIZE - 2);
10293 					ret = -EINVAL;
10294 					goto out;
10295 				}
10296 			}
10297 			done += size;
10298 
10299 			/* Remove comments */
10300 			tmp = strchr(buf, '#');
10301 
10302 			if (tmp)
10303 				*tmp = '\0';
10304 
10305 			ret = createfn(buf);
10306 			if (ret)
10307 				goto out;
10308 			buf += size;
10309 
10310 		} while (done < count);
10311 	}
10312 	ret = done;
10313 
10314 out:
10315 	kfree(kbuf);
10316 
10317 	return ret;
10318 }
10319 
10320 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10321 __init static bool tr_needs_alloc_snapshot(const char *name)
10322 {
10323 	char *test;
10324 	int len = strlen(name);
10325 	bool ret;
10326 
10327 	if (!boot_snapshot_index)
10328 		return false;
10329 
10330 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10331 	    boot_snapshot_info[len] == '\t')
10332 		return true;
10333 
10334 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10335 	if (!test)
10336 		return false;
10337 
10338 	sprintf(test, "\t%s\t", name);
10339 	ret = strstr(boot_snapshot_info, test) == NULL;
10340 	kfree(test);
10341 	return ret;
10342 }
10343 
do_allocate_snapshot(const char * name)10344 __init static void do_allocate_snapshot(const char *name)
10345 {
10346 	if (!tr_needs_alloc_snapshot(name))
10347 		return;
10348 
10349 	/*
10350 	 * When allocate_snapshot is set, the next call to
10351 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10352 	 * will allocate the snapshot buffer. That will alse clear
10353 	 * this flag.
10354 	 */
10355 	allocate_snapshot = true;
10356 }
10357 #else
do_allocate_snapshot(const char * name)10358 static inline void do_allocate_snapshot(const char *name) { }
10359 #endif
10360 
enable_instances(void)10361 __init static void enable_instances(void)
10362 {
10363 	struct trace_array *tr;
10364 	char *curr_str;
10365 	char *name;
10366 	char *str;
10367 	char *tok;
10368 
10369 	/* A tab is always appended */
10370 	boot_instance_info[boot_instance_index - 1] = '\0';
10371 	str = boot_instance_info;
10372 
10373 	while ((curr_str = strsep(&str, "\t"))) {
10374 		phys_addr_t start = 0;
10375 		phys_addr_t size = 0;
10376 		unsigned long addr = 0;
10377 		bool traceprintk = false;
10378 		bool traceoff = false;
10379 		char *flag_delim;
10380 		char *addr_delim;
10381 
10382 		tok = strsep(&curr_str, ",");
10383 
10384 		flag_delim = strchr(tok, '^');
10385 		addr_delim = strchr(tok, '@');
10386 
10387 		if (addr_delim)
10388 			*addr_delim++ = '\0';
10389 
10390 		if (flag_delim)
10391 			*flag_delim++ = '\0';
10392 
10393 		name = tok;
10394 
10395 		if (flag_delim) {
10396 			char *flag;
10397 
10398 			while ((flag = strsep(&flag_delim, "^"))) {
10399 				if (strcmp(flag, "traceoff") == 0) {
10400 					traceoff = true;
10401 				} else if ((strcmp(flag, "printk") == 0) ||
10402 					   (strcmp(flag, "traceprintk") == 0) ||
10403 					   (strcmp(flag, "trace_printk") == 0)) {
10404 					traceprintk = true;
10405 				} else {
10406 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10407 						flag, name);
10408 				}
10409 			}
10410 		}
10411 
10412 		tok = addr_delim;
10413 		if (tok && isdigit(*tok)) {
10414 			start = memparse(tok, &tok);
10415 			if (!start) {
10416 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10417 					name);
10418 				continue;
10419 			}
10420 			if (*tok != ':') {
10421 				pr_warn("Tracing: No size specified for instance %s\n", name);
10422 				continue;
10423 			}
10424 			tok++;
10425 			size = memparse(tok, &tok);
10426 			if (!size) {
10427 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10428 					name);
10429 				continue;
10430 			}
10431 		} else if (tok) {
10432 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10433 				start = 0;
10434 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10435 				continue;
10436 			}
10437 		}
10438 
10439 		if (start) {
10440 			addr = map_pages(start, size);
10441 			if (addr) {
10442 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10443 					name, &start, (unsigned long)size);
10444 			} else {
10445 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10446 				continue;
10447 			}
10448 		} else {
10449 			/* Only non mapped buffers have snapshot buffers */
10450 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10451 				do_allocate_snapshot(name);
10452 		}
10453 
10454 		tr = trace_array_create_systems(name, NULL, addr, size);
10455 		if (IS_ERR(tr)) {
10456 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10457 			continue;
10458 		}
10459 
10460 		if (traceoff)
10461 			tracer_tracing_off(tr);
10462 
10463 		if (traceprintk)
10464 			update_printk_trace(tr);
10465 
10466 		/*
10467 		 * If start is set, then this is a mapped buffer, and
10468 		 * cannot be deleted by user space, so keep the reference
10469 		 * to it.
10470 		 */
10471 		if (start) {
10472 			tr->flags |= TRACE_ARRAY_FL_BOOT;
10473 			tr->ref++;
10474 		}
10475 
10476 		while ((tok = strsep(&curr_str, ","))) {
10477 			early_enable_events(tr, tok, true);
10478 		}
10479 	}
10480 }
10481 
tracer_alloc_buffers(void)10482 __init static int tracer_alloc_buffers(void)
10483 {
10484 	int ring_buf_size;
10485 	int ret = -ENOMEM;
10486 
10487 
10488 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10489 		pr_warn("Tracing disabled due to lockdown\n");
10490 		return -EPERM;
10491 	}
10492 
10493 	/*
10494 	 * Make sure we don't accidentally add more trace options
10495 	 * than we have bits for.
10496 	 */
10497 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10498 
10499 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10500 		return -ENOMEM;
10501 
10502 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10503 		goto out_free_buffer_mask;
10504 
10505 	/* Only allocate trace_printk buffers if a trace_printk exists */
10506 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10507 		/* Must be called before global_trace.buffer is allocated */
10508 		trace_printk_init_buffers();
10509 
10510 	/* To save memory, keep the ring buffer size to its minimum */
10511 	if (global_trace.ring_buffer_expanded)
10512 		ring_buf_size = trace_buf_size;
10513 	else
10514 		ring_buf_size = 1;
10515 
10516 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10517 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10518 
10519 	raw_spin_lock_init(&global_trace.start_lock);
10520 
10521 	/*
10522 	 * The prepare callbacks allocates some memory for the ring buffer. We
10523 	 * don't free the buffer if the CPU goes down. If we were to free
10524 	 * the buffer, then the user would lose any trace that was in the
10525 	 * buffer. The memory will be removed once the "instance" is removed.
10526 	 */
10527 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10528 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10529 				      NULL);
10530 	if (ret < 0)
10531 		goto out_free_cpumask;
10532 	/* Used for event triggers */
10533 	ret = -ENOMEM;
10534 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10535 	if (!temp_buffer)
10536 		goto out_rm_hp_state;
10537 
10538 	if (trace_create_savedcmd() < 0)
10539 		goto out_free_temp_buffer;
10540 
10541 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10542 		goto out_free_savedcmd;
10543 
10544 	/* TODO: make the number of buffers hot pluggable with CPUS */
10545 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10546 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10547 		goto out_free_pipe_cpumask;
10548 	}
10549 	if (global_trace.buffer_disabled)
10550 		tracing_off();
10551 
10552 	if (trace_boot_clock) {
10553 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10554 		if (ret < 0)
10555 			pr_warn("Trace clock %s not defined, going back to default\n",
10556 				trace_boot_clock);
10557 	}
10558 
10559 	/*
10560 	 * register_tracer() might reference current_trace, so it
10561 	 * needs to be set before we register anything. This is
10562 	 * just a bootstrap of current_trace anyway.
10563 	 */
10564 	global_trace.current_trace = &nop_trace;
10565 
10566 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10567 #ifdef CONFIG_TRACER_MAX_TRACE
10568 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10569 #endif
10570 	ftrace_init_global_array_ops(&global_trace);
10571 
10572 	init_trace_flags_index(&global_trace);
10573 
10574 	register_tracer(&nop_trace);
10575 
10576 	/* Function tracing may start here (via kernel command line) */
10577 	init_function_trace();
10578 
10579 	/* All seems OK, enable tracing */
10580 	tracing_disabled = 0;
10581 
10582 	atomic_notifier_chain_register(&panic_notifier_list,
10583 				       &trace_panic_notifier);
10584 
10585 	register_die_notifier(&trace_die_notifier);
10586 
10587 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10588 
10589 	INIT_LIST_HEAD(&global_trace.systems);
10590 	INIT_LIST_HEAD(&global_trace.events);
10591 	INIT_LIST_HEAD(&global_trace.hist_vars);
10592 	INIT_LIST_HEAD(&global_trace.err_log);
10593 	list_add(&global_trace.list, &ftrace_trace_arrays);
10594 
10595 	apply_trace_boot_options();
10596 
10597 	register_snapshot_cmd();
10598 
10599 	return 0;
10600 
10601 out_free_pipe_cpumask:
10602 	free_cpumask_var(global_trace.pipe_cpumask);
10603 out_free_savedcmd:
10604 	trace_free_saved_cmdlines_buffer();
10605 out_free_temp_buffer:
10606 	ring_buffer_free(temp_buffer);
10607 out_rm_hp_state:
10608 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10609 out_free_cpumask:
10610 	free_cpumask_var(global_trace.tracing_cpumask);
10611 out_free_buffer_mask:
10612 	free_cpumask_var(tracing_buffer_mask);
10613 	return ret;
10614 }
10615 
ftrace_boot_snapshot(void)10616 void __init ftrace_boot_snapshot(void)
10617 {
10618 #ifdef CONFIG_TRACER_MAX_TRACE
10619 	struct trace_array *tr;
10620 
10621 	if (!snapshot_at_boot)
10622 		return;
10623 
10624 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10625 		if (!tr->allocated_snapshot)
10626 			continue;
10627 
10628 		tracing_snapshot_instance(tr);
10629 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10630 	}
10631 #endif
10632 }
10633 
early_trace_init(void)10634 void __init early_trace_init(void)
10635 {
10636 	if (tracepoint_printk) {
10637 		tracepoint_print_iter =
10638 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10639 		if (MEM_FAIL(!tracepoint_print_iter,
10640 			     "Failed to allocate trace iterator\n"))
10641 			tracepoint_printk = 0;
10642 		else
10643 			static_key_enable(&tracepoint_printk_key.key);
10644 	}
10645 	tracer_alloc_buffers();
10646 
10647 	init_events();
10648 }
10649 
trace_init(void)10650 void __init trace_init(void)
10651 {
10652 	trace_event_init();
10653 
10654 	if (boot_instance_index)
10655 		enable_instances();
10656 }
10657 
clear_boot_tracer(void)10658 __init static void clear_boot_tracer(void)
10659 {
10660 	/*
10661 	 * The default tracer at boot buffer is an init section.
10662 	 * This function is called in lateinit. If we did not
10663 	 * find the boot tracer, then clear it out, to prevent
10664 	 * later registration from accessing the buffer that is
10665 	 * about to be freed.
10666 	 */
10667 	if (!default_bootup_tracer)
10668 		return;
10669 
10670 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10671 	       default_bootup_tracer);
10672 	default_bootup_tracer = NULL;
10673 }
10674 
10675 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10676 __init static void tracing_set_default_clock(void)
10677 {
10678 	/* sched_clock_stable() is determined in late_initcall */
10679 	if (!trace_boot_clock && !sched_clock_stable()) {
10680 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10681 			pr_warn("Can not set tracing clock due to lockdown\n");
10682 			return;
10683 		}
10684 
10685 		printk(KERN_WARNING
10686 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10687 		       "If you want to keep using the local clock, then add:\n"
10688 		       "  \"trace_clock=local\"\n"
10689 		       "on the kernel command line\n");
10690 		tracing_set_clock(&global_trace, "global");
10691 	}
10692 }
10693 #else
tracing_set_default_clock(void)10694 static inline void tracing_set_default_clock(void) { }
10695 #endif
10696 
late_trace_init(void)10697 __init static int late_trace_init(void)
10698 {
10699 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10700 		static_key_disable(&tracepoint_printk_key.key);
10701 		tracepoint_printk = 0;
10702 	}
10703 
10704 	tracing_set_default_clock();
10705 	clear_boot_tracer();
10706 	return 0;
10707 }
10708 
10709 late_initcall_sync(late_trace_init);
10710