1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <trace/hooks/ftrace_dump.h>
53
54 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
55
56 #include "trace.h"
57 #include "trace_output.h"
58
59 #ifdef CONFIG_FTRACE_STARTUP_TEST
60 /*
61 * We need to change this state when a selftest is running.
62 * A selftest will lurk into the ring-buffer to count the
63 * entries inserted during the selftest although some concurrent
64 * insertions into the ring-buffer such as trace_printk could occurred
65 * at the same time, giving false positive or negative results.
66 */
67 static bool __read_mostly tracing_selftest_running;
68
69 /*
70 * If boot-time tracing including tracers/events via kernel cmdline
71 * is running, we do not want to run SELFTEST.
72 */
73 bool __read_mostly tracing_selftest_disabled;
74
disable_tracing_selftest(const char * reason)75 void __init disable_tracing_selftest(const char *reason)
76 {
77 if (!tracing_selftest_disabled) {
78 tracing_selftest_disabled = true;
79 pr_info("Ftrace startup test is disabled due to %s\n", reason);
80 }
81 }
82 #else
83 #define tracing_selftest_running 0
84 #define tracing_selftest_disabled 0
85 #endif
86
87 /* Pipe tracepoints to printk */
88 static struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 { }
96 };
97
98 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 return 0;
102 }
103
104 /*
105 * To prevent the comm cache from being overwritten when no
106 * tracing is active, only save the comm when a trace event
107 * occurred.
108 */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112 * Kill all tracing for good (never come back).
113 * It is initialized to 1 but will turn to zero if the initialization
114 * of the tracer is successful. But that is the only place that sets
115 * this back to zero.
116 */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly tracing_buffer_mask;
120
121 /*
122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123 *
124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125 * is set, then ftrace_dump is called. This will output the contents
126 * of the ftrace buffers to the console. This is very useful for
127 * capturing traces that lead to crashes and outputing it to a
128 * serial console.
129 *
130 * It is default off, but you can enable it with either specifying
131 * "ftrace_dump_on_oops" in the kernel command line, or setting
132 * /proc/sys/kernel/ftrace_dump_on_oops
133 * Set 1 if you want to dump buffers of all CPUs
134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
135 * Set instance name if you want to dump the specific trace instance
136 * Multiple instance dump is also supported, and instances are seperated
137 * by commas.
138 */
139 /* Set to string format zero to disable by default */
140 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141
142 /* When set, tracing will stop when a WARN*() is hit */
143 int __disable_trace_on_warning;
144
145 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
146 /* Map of enums to their values, for "eval_map" file */
147 struct trace_eval_map_head {
148 struct module *mod;
149 unsigned long length;
150 };
151
152 union trace_eval_map_item;
153
154 struct trace_eval_map_tail {
155 /*
156 * "end" is first and points to NULL as it must be different
157 * than "mod" or "eval_string"
158 */
159 union trace_eval_map_item *next;
160 const char *end; /* points to NULL */
161 };
162
163 static DEFINE_MUTEX(trace_eval_mutex);
164
165 /*
166 * The trace_eval_maps are saved in an array with two extra elements,
167 * one at the beginning, and one at the end. The beginning item contains
168 * the count of the saved maps (head.length), and the module they
169 * belong to if not built in (head.mod). The ending item contains a
170 * pointer to the next array of saved eval_map items.
171 */
172 union trace_eval_map_item {
173 struct trace_eval_map map;
174 struct trace_eval_map_head head;
175 struct trace_eval_map_tail tail;
176 };
177
178 static union trace_eval_map_item *trace_eval_maps;
179 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
180
181 int tracing_set_tracer(struct trace_array *tr, const char *buf);
182 static void ftrace_trace_userstack(struct trace_array *tr,
183 struct trace_buffer *buffer,
184 unsigned int trace_ctx);
185
186 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
187 static char *default_bootup_tracer;
188
189 static bool allocate_snapshot;
190 static bool snapshot_at_boot;
191
192 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
193 static int boot_instance_index;
194
195 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
196 static int boot_snapshot_index;
197
set_cmdline_ftrace(char * str)198 static int __init set_cmdline_ftrace(char *str)
199 {
200 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
201 default_bootup_tracer = bootup_tracer_buf;
202 /* We are using ftrace early, expand it */
203 trace_set_ring_buffer_expanded(NULL);
204 return 1;
205 }
206 __setup("ftrace=", set_cmdline_ftrace);
207
ftrace_dump_on_oops_enabled(void)208 int ftrace_dump_on_oops_enabled(void)
209 {
210 if (!strcmp("0", ftrace_dump_on_oops))
211 return 0;
212 else
213 return 1;
214 }
215
set_ftrace_dump_on_oops(char * str)216 static int __init set_ftrace_dump_on_oops(char *str)
217 {
218 if (!*str) {
219 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
220 return 1;
221 }
222
223 if (*str == ',') {
224 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
225 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
226 return 1;
227 }
228
229 if (*str++ == '=') {
230 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
231 return 1;
232 }
233
234 return 0;
235 }
236 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
237
stop_trace_on_warning(char * str)238 static int __init stop_trace_on_warning(char *str)
239 {
240 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
241 __disable_trace_on_warning = 1;
242 return 1;
243 }
244 __setup("traceoff_on_warning", stop_trace_on_warning);
245
boot_alloc_snapshot(char * str)246 static int __init boot_alloc_snapshot(char *str)
247 {
248 char *slot = boot_snapshot_info + boot_snapshot_index;
249 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
250 int ret;
251
252 if (str[0] == '=') {
253 str++;
254 if (strlen(str) >= left)
255 return -1;
256
257 ret = snprintf(slot, left, "%s\t", str);
258 boot_snapshot_index += ret;
259 } else {
260 allocate_snapshot = true;
261 /* We also need the main ring buffer expanded */
262 trace_set_ring_buffer_expanded(NULL);
263 }
264 return 1;
265 }
266 __setup("alloc_snapshot", boot_alloc_snapshot);
267
268
boot_snapshot(char * str)269 static int __init boot_snapshot(char *str)
270 {
271 snapshot_at_boot = true;
272 boot_alloc_snapshot(str);
273 return 1;
274 }
275 __setup("ftrace_boot_snapshot", boot_snapshot);
276
277
boot_instance(char * str)278 static int __init boot_instance(char *str)
279 {
280 char *slot = boot_instance_info + boot_instance_index;
281 int left = sizeof(boot_instance_info) - boot_instance_index;
282 int ret;
283
284 if (strlen(str) >= left)
285 return -1;
286
287 ret = snprintf(slot, left, "%s\t", str);
288 boot_instance_index += ret;
289
290 return 1;
291 }
292 __setup("trace_instance=", boot_instance);
293
294
295 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
296
set_trace_boot_options(char * str)297 static int __init set_trace_boot_options(char *str)
298 {
299 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
300 return 1;
301 }
302 __setup("trace_options=", set_trace_boot_options);
303
304 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
305 static char *trace_boot_clock __initdata;
306
set_trace_boot_clock(char * str)307 static int __init set_trace_boot_clock(char *str)
308 {
309 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
310 trace_boot_clock = trace_boot_clock_buf;
311 return 1;
312 }
313 __setup("trace_clock=", set_trace_boot_clock);
314
set_tracepoint_printk(char * str)315 static int __init set_tracepoint_printk(char *str)
316 {
317 /* Ignore the "tp_printk_stop_on_boot" param */
318 if (*str == '_')
319 return 0;
320
321 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
322 tracepoint_printk = 1;
323 return 1;
324 }
325 __setup("tp_printk", set_tracepoint_printk);
326
set_tracepoint_printk_stop(char * str)327 static int __init set_tracepoint_printk_stop(char *str)
328 {
329 tracepoint_printk_stop_on_boot = true;
330 return 1;
331 }
332 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
333
ns2usecs(u64 nsec)334 unsigned long long ns2usecs(u64 nsec)
335 {
336 nsec += 500;
337 do_div(nsec, 1000);
338 return nsec;
339 }
340
341 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)342 trace_process_export(struct trace_export *export,
343 struct ring_buffer_event *event, int flag)
344 {
345 struct trace_entry *entry;
346 unsigned int size = 0;
347
348 if (export->flags & flag) {
349 entry = ring_buffer_event_data(event);
350 size = ring_buffer_event_length(event);
351 export->write(export, entry, size);
352 }
353 }
354
355 static DEFINE_MUTEX(ftrace_export_lock);
356
357 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
358
359 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
360 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
361 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
362
ftrace_exports_enable(struct trace_export * export)363 static inline void ftrace_exports_enable(struct trace_export *export)
364 {
365 if (export->flags & TRACE_EXPORT_FUNCTION)
366 static_branch_inc(&trace_function_exports_enabled);
367
368 if (export->flags & TRACE_EXPORT_EVENT)
369 static_branch_inc(&trace_event_exports_enabled);
370
371 if (export->flags & TRACE_EXPORT_MARKER)
372 static_branch_inc(&trace_marker_exports_enabled);
373 }
374
ftrace_exports_disable(struct trace_export * export)375 static inline void ftrace_exports_disable(struct trace_export *export)
376 {
377 if (export->flags & TRACE_EXPORT_FUNCTION)
378 static_branch_dec(&trace_function_exports_enabled);
379
380 if (export->flags & TRACE_EXPORT_EVENT)
381 static_branch_dec(&trace_event_exports_enabled);
382
383 if (export->flags & TRACE_EXPORT_MARKER)
384 static_branch_dec(&trace_marker_exports_enabled);
385 }
386
ftrace_exports(struct ring_buffer_event * event,int flag)387 static void ftrace_exports(struct ring_buffer_event *event, int flag)
388 {
389 struct trace_export *export;
390
391 preempt_disable_notrace();
392
393 export = rcu_dereference_raw_check(ftrace_exports_list);
394 while (export) {
395 trace_process_export(export, event, flag);
396 export = rcu_dereference_raw_check(export->next);
397 }
398
399 preempt_enable_notrace();
400 }
401
402 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)403 add_trace_export(struct trace_export **list, struct trace_export *export)
404 {
405 rcu_assign_pointer(export->next, *list);
406 /*
407 * We are entering export into the list but another
408 * CPU might be walking that list. We need to make sure
409 * the export->next pointer is valid before another CPU sees
410 * the export pointer included into the list.
411 */
412 rcu_assign_pointer(*list, export);
413 }
414
415 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)416 rm_trace_export(struct trace_export **list, struct trace_export *export)
417 {
418 struct trace_export **p;
419
420 for (p = list; *p != NULL; p = &(*p)->next)
421 if (*p == export)
422 break;
423
424 if (*p != export)
425 return -1;
426
427 rcu_assign_pointer(*p, (*p)->next);
428
429 return 0;
430 }
431
432 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)433 add_ftrace_export(struct trace_export **list, struct trace_export *export)
434 {
435 ftrace_exports_enable(export);
436
437 add_trace_export(list, export);
438 }
439
440 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)441 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
442 {
443 int ret;
444
445 ret = rm_trace_export(list, export);
446 ftrace_exports_disable(export);
447
448 return ret;
449 }
450
register_ftrace_export(struct trace_export * export)451 int register_ftrace_export(struct trace_export *export)
452 {
453 if (WARN_ON_ONCE(!export->write))
454 return -1;
455
456 mutex_lock(&ftrace_export_lock);
457
458 add_ftrace_export(&ftrace_exports_list, export);
459
460 mutex_unlock(&ftrace_export_lock);
461
462 return 0;
463 }
464 EXPORT_SYMBOL_GPL(register_ftrace_export);
465
unregister_ftrace_export(struct trace_export * export)466 int unregister_ftrace_export(struct trace_export *export)
467 {
468 int ret;
469
470 mutex_lock(&ftrace_export_lock);
471
472 ret = rm_ftrace_export(&ftrace_exports_list, export);
473
474 mutex_unlock(&ftrace_export_lock);
475
476 return ret;
477 }
478 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
479
480 /* trace_flags holds trace_options default values */
481 #define TRACE_DEFAULT_FLAGS \
482 (FUNCTION_DEFAULT_FLAGS | \
483 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
484 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
485 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
486 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
487 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
488
489 /* trace_options that are only supported by global_trace */
490 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
491 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
492
493 /* trace_flags that are default zero for instances */
494 #define ZEROED_TRACE_FLAGS \
495 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
496
497 /*
498 * The global_trace is the descriptor that holds the top-level tracing
499 * buffers for the live tracing.
500 */
501 static struct trace_array global_trace = {
502 .trace_flags = TRACE_DEFAULT_FLAGS,
503 };
504
505 static struct trace_array *printk_trace = &global_trace;
506
printk_binsafe(struct trace_array * tr)507 static __always_inline bool printk_binsafe(struct trace_array *tr)
508 {
509 /*
510 * The binary format of traceprintk can cause a crash if used
511 * by a buffer from another boot. Force the use of the
512 * non binary version of trace_printk if the trace_printk
513 * buffer is a boot mapped ring buffer.
514 */
515 return !(tr->flags & TRACE_ARRAY_FL_BOOT);
516 }
517
update_printk_trace(struct trace_array * tr)518 static void update_printk_trace(struct trace_array *tr)
519 {
520 if (printk_trace == tr)
521 return;
522
523 printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
524 printk_trace = tr;
525 tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
526 }
527
trace_set_ring_buffer_expanded(struct trace_array * tr)528 void trace_set_ring_buffer_expanded(struct trace_array *tr)
529 {
530 if (!tr)
531 tr = &global_trace;
532 tr->ring_buffer_expanded = true;
533 }
534
535 LIST_HEAD(ftrace_trace_arrays);
536
trace_array_get(struct trace_array * this_tr)537 int trace_array_get(struct trace_array *this_tr)
538 {
539 struct trace_array *tr;
540
541 guard(mutex)(&trace_types_lock);
542 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
543 if (tr == this_tr) {
544 tr->ref++;
545 return 0;
546 }
547 }
548
549 return -ENODEV;
550 }
551
__trace_array_put(struct trace_array * this_tr)552 static void __trace_array_put(struct trace_array *this_tr)
553 {
554 WARN_ON(!this_tr->ref);
555 this_tr->ref--;
556 }
557
558 /**
559 * trace_array_put - Decrement the reference counter for this trace array.
560 * @this_tr : pointer to the trace array
561 *
562 * NOTE: Use this when we no longer need the trace array returned by
563 * trace_array_get_by_name(). This ensures the trace array can be later
564 * destroyed.
565 *
566 */
trace_array_put(struct trace_array * this_tr)567 void trace_array_put(struct trace_array *this_tr)
568 {
569 if (!this_tr)
570 return;
571
572 mutex_lock(&trace_types_lock);
573 __trace_array_put(this_tr);
574 mutex_unlock(&trace_types_lock);
575 }
576 EXPORT_SYMBOL_GPL(trace_array_put);
577
tracing_check_open_get_tr(struct trace_array * tr)578 int tracing_check_open_get_tr(struct trace_array *tr)
579 {
580 int ret;
581
582 ret = security_locked_down(LOCKDOWN_TRACEFS);
583 if (ret)
584 return ret;
585
586 if (tracing_disabled)
587 return -ENODEV;
588
589 if (tr && trace_array_get(tr) < 0)
590 return -ENODEV;
591
592 return 0;
593 }
594
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)595 int call_filter_check_discard(struct trace_event_call *call, void *rec,
596 struct trace_buffer *buffer,
597 struct ring_buffer_event *event)
598 {
599 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
600 !filter_match_preds(call->filter, rec)) {
601 __trace_event_discard_commit(buffer, event);
602 return 1;
603 }
604
605 return 0;
606 }
607
608 /**
609 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
610 * @filtered_pids: The list of pids to check
611 * @search_pid: The PID to find in @filtered_pids
612 *
613 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
614 */
615 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)616 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
617 {
618 return trace_pid_list_is_set(filtered_pids, search_pid);
619 }
620
621 /**
622 * trace_ignore_this_task - should a task be ignored for tracing
623 * @filtered_pids: The list of pids to check
624 * @filtered_no_pids: The list of pids not to be traced
625 * @task: The task that should be ignored if not filtered
626 *
627 * Checks if @task should be traced or not from @filtered_pids.
628 * Returns true if @task should *NOT* be traced.
629 * Returns false if @task should be traced.
630 */
631 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)632 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
633 struct trace_pid_list *filtered_no_pids,
634 struct task_struct *task)
635 {
636 /*
637 * If filtered_no_pids is not empty, and the task's pid is listed
638 * in filtered_no_pids, then return true.
639 * Otherwise, if filtered_pids is empty, that means we can
640 * trace all tasks. If it has content, then only trace pids
641 * within filtered_pids.
642 */
643
644 return (filtered_pids &&
645 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
646 (filtered_no_pids &&
647 trace_find_filtered_pid(filtered_no_pids, task->pid));
648 }
649
650 /**
651 * trace_filter_add_remove_task - Add or remove a task from a pid_list
652 * @pid_list: The list to modify
653 * @self: The current task for fork or NULL for exit
654 * @task: The task to add or remove
655 *
656 * If adding a task, if @self is defined, the task is only added if @self
657 * is also included in @pid_list. This happens on fork and tasks should
658 * only be added when the parent is listed. If @self is NULL, then the
659 * @task pid will be removed from the list, which would happen on exit
660 * of a task.
661 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)662 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
663 struct task_struct *self,
664 struct task_struct *task)
665 {
666 if (!pid_list)
667 return;
668
669 /* For forks, we only add if the forking task is listed */
670 if (self) {
671 if (!trace_find_filtered_pid(pid_list, self->pid))
672 return;
673 }
674
675 /* "self" is set for forks, and NULL for exits */
676 if (self)
677 trace_pid_list_set(pid_list, task->pid);
678 else
679 trace_pid_list_clear(pid_list, task->pid);
680 }
681
682 /**
683 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
684 * @pid_list: The pid list to show
685 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
686 * @pos: The position of the file
687 *
688 * This is used by the seq_file "next" operation to iterate the pids
689 * listed in a trace_pid_list structure.
690 *
691 * Returns the pid+1 as we want to display pid of zero, but NULL would
692 * stop the iteration.
693 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)694 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
695 {
696 long pid = (unsigned long)v;
697 unsigned int next;
698
699 (*pos)++;
700
701 /* pid already is +1 of the actual previous bit */
702 if (trace_pid_list_next(pid_list, pid, &next) < 0)
703 return NULL;
704
705 pid = next;
706
707 /* Return pid + 1 to allow zero to be represented */
708 return (void *)(pid + 1);
709 }
710
711 /**
712 * trace_pid_start - Used for seq_file to start reading pid lists
713 * @pid_list: The pid list to show
714 * @pos: The position of the file
715 *
716 * This is used by seq_file "start" operation to start the iteration
717 * of listing pids.
718 *
719 * Returns the pid+1 as we want to display pid of zero, but NULL would
720 * stop the iteration.
721 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)722 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
723 {
724 unsigned long pid;
725 unsigned int first;
726 loff_t l = 0;
727
728 if (trace_pid_list_first(pid_list, &first) < 0)
729 return NULL;
730
731 pid = first;
732
733 /* Return pid + 1 so that zero can be the exit value */
734 for (pid++; pid && l < *pos;
735 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
736 ;
737 return (void *)pid;
738 }
739
740 /**
741 * trace_pid_show - show the current pid in seq_file processing
742 * @m: The seq_file structure to write into
743 * @v: A void pointer of the pid (+1) value to display
744 *
745 * Can be directly used by seq_file operations to display the current
746 * pid value.
747 */
trace_pid_show(struct seq_file * m,void * v)748 int trace_pid_show(struct seq_file *m, void *v)
749 {
750 unsigned long pid = (unsigned long)v - 1;
751
752 seq_printf(m, "%lu\n", pid);
753 return 0;
754 }
755
756 /* 128 should be much more than enough */
757 #define PID_BUF_SIZE 127
758
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)759 int trace_pid_write(struct trace_pid_list *filtered_pids,
760 struct trace_pid_list **new_pid_list,
761 const char __user *ubuf, size_t cnt)
762 {
763 struct trace_pid_list *pid_list;
764 struct trace_parser parser;
765 unsigned long val;
766 int nr_pids = 0;
767 ssize_t read = 0;
768 ssize_t ret;
769 loff_t pos;
770 pid_t pid;
771
772 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
773 return -ENOMEM;
774
775 /*
776 * Always recreate a new array. The write is an all or nothing
777 * operation. Always create a new array when adding new pids by
778 * the user. If the operation fails, then the current list is
779 * not modified.
780 */
781 pid_list = trace_pid_list_alloc();
782 if (!pid_list) {
783 trace_parser_put(&parser);
784 return -ENOMEM;
785 }
786
787 if (filtered_pids) {
788 /* copy the current bits to the new max */
789 ret = trace_pid_list_first(filtered_pids, &pid);
790 while (!ret) {
791 ret = trace_pid_list_set(pid_list, pid);
792 if (ret < 0)
793 goto out;
794
795 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
796 nr_pids++;
797 }
798 }
799
800 ret = 0;
801 while (cnt > 0) {
802
803 pos = 0;
804
805 ret = trace_get_user(&parser, ubuf, cnt, &pos);
806 if (ret < 0)
807 break;
808
809 read += ret;
810 ubuf += ret;
811 cnt -= ret;
812
813 if (!trace_parser_loaded(&parser))
814 break;
815
816 ret = -EINVAL;
817 if (kstrtoul(parser.buffer, 0, &val))
818 break;
819
820 pid = (pid_t)val;
821
822 if (trace_pid_list_set(pid_list, pid) < 0) {
823 ret = -1;
824 break;
825 }
826 nr_pids++;
827
828 trace_parser_clear(&parser);
829 ret = 0;
830 }
831 out:
832 trace_parser_put(&parser);
833
834 if (ret < 0) {
835 trace_pid_list_free(pid_list);
836 return ret;
837 }
838
839 if (!nr_pids) {
840 /* Cleared the list of pids */
841 trace_pid_list_free(pid_list);
842 pid_list = NULL;
843 }
844
845 *new_pid_list = pid_list;
846
847 return read;
848 }
849
buffer_ftrace_now(struct array_buffer * buf,int cpu)850 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
851 {
852 u64 ts;
853
854 /* Early boot up does not have a buffer yet */
855 if (!buf->buffer)
856 return trace_clock_local();
857
858 ts = ring_buffer_time_stamp(buf->buffer);
859 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
860
861 return ts;
862 }
863
ftrace_now(int cpu)864 u64 ftrace_now(int cpu)
865 {
866 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
867 }
868
869 /**
870 * tracing_is_enabled - Show if global_trace has been enabled
871 *
872 * Shows if the global trace has been enabled or not. It uses the
873 * mirror flag "buffer_disabled" to be used in fast paths such as for
874 * the irqsoff tracer. But it may be inaccurate due to races. If you
875 * need to know the accurate state, use tracing_is_on() which is a little
876 * slower, but accurate.
877 */
tracing_is_enabled(void)878 int tracing_is_enabled(void)
879 {
880 /*
881 * For quick access (irqsoff uses this in fast path), just
882 * return the mirror variable of the state of the ring buffer.
883 * It's a little racy, but we don't really care.
884 */
885 smp_rmb();
886 return !global_trace.buffer_disabled;
887 }
888
889 /*
890 * trace_buf_size is the size in bytes that is allocated
891 * for a buffer. Note, the number of bytes is always rounded
892 * to page size.
893 *
894 * This number is purposely set to a low number of 16384.
895 * If the dump on oops happens, it will be much appreciated
896 * to not have to wait for all that output. Anyway this can be
897 * boot time and run time configurable.
898 */
899 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
900
901 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
902
903 /* trace_types holds a link list of available tracers. */
904 static struct tracer *trace_types __read_mostly;
905
906 /*
907 * trace_types_lock is used to protect the trace_types list.
908 */
909 DEFINE_MUTEX(trace_types_lock);
910
911 /*
912 * serialize the access of the ring buffer
913 *
914 * ring buffer serializes readers, but it is low level protection.
915 * The validity of the events (which returns by ring_buffer_peek() ..etc)
916 * are not protected by ring buffer.
917 *
918 * The content of events may become garbage if we allow other process consumes
919 * these events concurrently:
920 * A) the page of the consumed events may become a normal page
921 * (not reader page) in ring buffer, and this page will be rewritten
922 * by events producer.
923 * B) The page of the consumed events may become a page for splice_read,
924 * and this page will be returned to system.
925 *
926 * These primitives allow multi process access to different cpu ring buffer
927 * concurrently.
928 *
929 * These primitives don't distinguish read-only and read-consume access.
930 * Multi read-only access are also serialized.
931 */
932
933 #ifdef CONFIG_SMP
934 static DECLARE_RWSEM(all_cpu_access_lock);
935 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
936
trace_access_lock(int cpu)937 static inline void trace_access_lock(int cpu)
938 {
939 if (cpu == RING_BUFFER_ALL_CPUS) {
940 /* gain it for accessing the whole ring buffer. */
941 down_write(&all_cpu_access_lock);
942 } else {
943 /* gain it for accessing a cpu ring buffer. */
944
945 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
946 down_read(&all_cpu_access_lock);
947
948 /* Secondly block other access to this @cpu ring buffer. */
949 mutex_lock(&per_cpu(cpu_access_lock, cpu));
950 }
951 }
952
trace_access_unlock(int cpu)953 static inline void trace_access_unlock(int cpu)
954 {
955 if (cpu == RING_BUFFER_ALL_CPUS) {
956 up_write(&all_cpu_access_lock);
957 } else {
958 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
959 up_read(&all_cpu_access_lock);
960 }
961 }
962
trace_access_lock_init(void)963 static inline void trace_access_lock_init(void)
964 {
965 int cpu;
966
967 for_each_possible_cpu(cpu)
968 mutex_init(&per_cpu(cpu_access_lock, cpu));
969 }
970
971 #else
972
973 static DEFINE_MUTEX(access_lock);
974
trace_access_lock(int cpu)975 static inline void trace_access_lock(int cpu)
976 {
977 (void)cpu;
978 mutex_lock(&access_lock);
979 }
980
trace_access_unlock(int cpu)981 static inline void trace_access_unlock(int cpu)
982 {
983 (void)cpu;
984 mutex_unlock(&access_lock);
985 }
986
trace_access_lock_init(void)987 static inline void trace_access_lock_init(void)
988 {
989 }
990
991 #endif
992
993 #ifdef CONFIG_STACKTRACE
994 static void __ftrace_trace_stack(struct trace_array *tr,
995 struct trace_buffer *buffer,
996 unsigned int trace_ctx,
997 int skip, struct pt_regs *regs);
998 static inline void ftrace_trace_stack(struct trace_array *tr,
999 struct trace_buffer *buffer,
1000 unsigned int trace_ctx,
1001 int skip, struct pt_regs *regs);
1002
1003 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1004 static inline void __ftrace_trace_stack(struct trace_array *tr,
1005 struct trace_buffer *buffer,
1006 unsigned int trace_ctx,
1007 int skip, struct pt_regs *regs)
1008 {
1009 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1010 static inline void ftrace_trace_stack(struct trace_array *tr,
1011 struct trace_buffer *buffer,
1012 unsigned long trace_ctx,
1013 int skip, struct pt_regs *regs)
1014 {
1015 }
1016
1017 #endif
1018
1019 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1020 trace_event_setup(struct ring_buffer_event *event,
1021 int type, unsigned int trace_ctx)
1022 {
1023 struct trace_entry *ent = ring_buffer_event_data(event);
1024
1025 tracing_generic_entry_update(ent, type, trace_ctx);
1026 }
1027
1028 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1029 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1030 int type,
1031 unsigned long len,
1032 unsigned int trace_ctx)
1033 {
1034 struct ring_buffer_event *event;
1035
1036 event = ring_buffer_lock_reserve(buffer, len);
1037 if (event != NULL)
1038 trace_event_setup(event, type, trace_ctx);
1039
1040 return event;
1041 }
1042
tracer_tracing_on(struct trace_array * tr)1043 void tracer_tracing_on(struct trace_array *tr)
1044 {
1045 if (tr->array_buffer.buffer)
1046 ring_buffer_record_on(tr->array_buffer.buffer);
1047 /*
1048 * This flag is looked at when buffers haven't been allocated
1049 * yet, or by some tracers (like irqsoff), that just want to
1050 * know if the ring buffer has been disabled, but it can handle
1051 * races of where it gets disabled but we still do a record.
1052 * As the check is in the fast path of the tracers, it is more
1053 * important to be fast than accurate.
1054 */
1055 tr->buffer_disabled = 0;
1056 /* Make the flag seen by readers */
1057 smp_wmb();
1058 }
1059
1060 /**
1061 * tracing_on - enable tracing buffers
1062 *
1063 * This function enables tracing buffers that may have been
1064 * disabled with tracing_off.
1065 */
tracing_on(void)1066 void tracing_on(void)
1067 {
1068 tracer_tracing_on(&global_trace);
1069 }
1070
1071
1072 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1073 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1074 {
1075 __this_cpu_write(trace_taskinfo_save, true);
1076
1077 /* If this is the temp buffer, we need to commit fully */
1078 if (this_cpu_read(trace_buffered_event) == event) {
1079 /* Length is in event->array[0] */
1080 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1081 /* Release the temp buffer */
1082 this_cpu_dec(trace_buffered_event_cnt);
1083 /* ring_buffer_unlock_commit() enables preemption */
1084 preempt_enable_notrace();
1085 } else
1086 ring_buffer_unlock_commit(buffer);
1087 }
1088
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1089 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1090 const char *str, int size)
1091 {
1092 struct ring_buffer_event *event;
1093 struct trace_buffer *buffer;
1094 struct print_entry *entry;
1095 unsigned int trace_ctx;
1096 int alloc;
1097
1098 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1099 return 0;
1100
1101 if (unlikely(tracing_selftest_running && tr == &global_trace))
1102 return 0;
1103
1104 if (unlikely(tracing_disabled))
1105 return 0;
1106
1107 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1108
1109 trace_ctx = tracing_gen_ctx();
1110 buffer = tr->array_buffer.buffer;
1111 ring_buffer_nest_start(buffer);
1112 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1113 trace_ctx);
1114 if (!event) {
1115 size = 0;
1116 goto out;
1117 }
1118
1119 entry = ring_buffer_event_data(event);
1120 entry->ip = ip;
1121
1122 memcpy(&entry->buf, str, size);
1123
1124 /* Add a newline if necessary */
1125 if (entry->buf[size - 1] != '\n') {
1126 entry->buf[size] = '\n';
1127 entry->buf[size + 1] = '\0';
1128 } else
1129 entry->buf[size] = '\0';
1130
1131 __buffer_unlock_commit(buffer, event);
1132 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1133 out:
1134 ring_buffer_nest_end(buffer);
1135 return size;
1136 }
1137 EXPORT_SYMBOL_GPL(__trace_array_puts);
1138
1139 /**
1140 * __trace_puts - write a constant string into the trace buffer.
1141 * @ip: The address of the caller
1142 * @str: The constant string to write
1143 * @size: The size of the string.
1144 */
__trace_puts(unsigned long ip,const char * str,int size)1145 int __trace_puts(unsigned long ip, const char *str, int size)
1146 {
1147 return __trace_array_puts(printk_trace, ip, str, size);
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_puts);
1150
1151 /**
1152 * __trace_bputs - write the pointer to a constant string into trace buffer
1153 * @ip: The address of the caller
1154 * @str: The constant string to write to the buffer to
1155 */
__trace_bputs(unsigned long ip,const char * str)1156 int __trace_bputs(unsigned long ip, const char *str)
1157 {
1158 struct trace_array *tr = READ_ONCE(printk_trace);
1159 struct ring_buffer_event *event;
1160 struct trace_buffer *buffer;
1161 struct bputs_entry *entry;
1162 unsigned int trace_ctx;
1163 int size = sizeof(struct bputs_entry);
1164 int ret = 0;
1165
1166 if (!printk_binsafe(tr))
1167 return __trace_puts(ip, str, strlen(str));
1168
1169 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1170 return 0;
1171
1172 if (unlikely(tracing_selftest_running || tracing_disabled))
1173 return 0;
1174
1175 trace_ctx = tracing_gen_ctx();
1176 buffer = tr->array_buffer.buffer;
1177
1178 ring_buffer_nest_start(buffer);
1179 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1180 trace_ctx);
1181 if (!event)
1182 goto out;
1183
1184 entry = ring_buffer_event_data(event);
1185 entry->ip = ip;
1186 entry->str = str;
1187
1188 __buffer_unlock_commit(buffer, event);
1189 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1190
1191 ret = 1;
1192 out:
1193 ring_buffer_nest_end(buffer);
1194 return ret;
1195 }
1196 EXPORT_SYMBOL_GPL(__trace_bputs);
1197
1198 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1199 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1200 void *cond_data)
1201 {
1202 struct tracer *tracer = tr->current_trace;
1203 unsigned long flags;
1204
1205 if (in_nmi()) {
1206 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1207 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1208 return;
1209 }
1210
1211 if (!tr->allocated_snapshot) {
1212 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1213 trace_array_puts(tr, "*** stopping trace here! ***\n");
1214 tracer_tracing_off(tr);
1215 return;
1216 }
1217
1218 /* Note, snapshot can not be used when the tracer uses it */
1219 if (tracer->use_max_tr) {
1220 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1221 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1222 return;
1223 }
1224
1225 if (tr->mapped) {
1226 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1227 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1228 return;
1229 }
1230
1231 local_irq_save(flags);
1232 update_max_tr(tr, current, smp_processor_id(), cond_data);
1233 local_irq_restore(flags);
1234 }
1235
tracing_snapshot_instance(struct trace_array * tr)1236 void tracing_snapshot_instance(struct trace_array *tr)
1237 {
1238 tracing_snapshot_instance_cond(tr, NULL);
1239 }
1240
1241 /**
1242 * tracing_snapshot - take a snapshot of the current buffer.
1243 *
1244 * This causes a swap between the snapshot buffer and the current live
1245 * tracing buffer. You can use this to take snapshots of the live
1246 * trace when some condition is triggered, but continue to trace.
1247 *
1248 * Note, make sure to allocate the snapshot with either
1249 * a tracing_snapshot_alloc(), or by doing it manually
1250 * with: echo 1 > /sys/kernel/tracing/snapshot
1251 *
1252 * If the snapshot buffer is not allocated, it will stop tracing.
1253 * Basically making a permanent snapshot.
1254 */
tracing_snapshot(void)1255 void tracing_snapshot(void)
1256 {
1257 struct trace_array *tr = &global_trace;
1258
1259 tracing_snapshot_instance(tr);
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot);
1262
1263 /**
1264 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1265 * @tr: The tracing instance to snapshot
1266 * @cond_data: The data to be tested conditionally, and possibly saved
1267 *
1268 * This is the same as tracing_snapshot() except that the snapshot is
1269 * conditional - the snapshot will only happen if the
1270 * cond_snapshot.update() implementation receiving the cond_data
1271 * returns true, which means that the trace array's cond_snapshot
1272 * update() operation used the cond_data to determine whether the
1273 * snapshot should be taken, and if it was, presumably saved it along
1274 * with the snapshot.
1275 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1276 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1277 {
1278 tracing_snapshot_instance_cond(tr, cond_data);
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1281
1282 /**
1283 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1284 * @tr: The tracing instance
1285 *
1286 * When the user enables a conditional snapshot using
1287 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1288 * with the snapshot. This accessor is used to retrieve it.
1289 *
1290 * Should not be called from cond_snapshot.update(), since it takes
1291 * the tr->max_lock lock, which the code calling
1292 * cond_snapshot.update() has already done.
1293 *
1294 * Returns the cond_data associated with the trace array's snapshot.
1295 */
tracing_cond_snapshot_data(struct trace_array * tr)1296 void *tracing_cond_snapshot_data(struct trace_array *tr)
1297 {
1298 void *cond_data = NULL;
1299
1300 local_irq_disable();
1301 arch_spin_lock(&tr->max_lock);
1302
1303 if (tr->cond_snapshot)
1304 cond_data = tr->cond_snapshot->cond_data;
1305
1306 arch_spin_unlock(&tr->max_lock);
1307 local_irq_enable();
1308
1309 return cond_data;
1310 }
1311 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1312
1313 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1314 struct array_buffer *size_buf, int cpu_id);
1315 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1316
tracing_alloc_snapshot_instance(struct trace_array * tr)1317 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1318 {
1319 int order;
1320 int ret;
1321
1322 if (!tr->allocated_snapshot) {
1323
1324 /* Make the snapshot buffer have the same order as main buffer */
1325 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1326 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1327 if (ret < 0)
1328 return ret;
1329
1330 /* allocate spare buffer */
1331 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1332 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1333 if (ret < 0)
1334 return ret;
1335
1336 tr->allocated_snapshot = true;
1337 }
1338
1339 return 0;
1340 }
1341
free_snapshot(struct trace_array * tr)1342 static void free_snapshot(struct trace_array *tr)
1343 {
1344 /*
1345 * We don't free the ring buffer. instead, resize it because
1346 * The max_tr ring buffer has some state (e.g. ring->clock) and
1347 * we want preserve it.
1348 */
1349 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1350 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1351 set_buffer_entries(&tr->max_buffer, 1);
1352 tracing_reset_online_cpus(&tr->max_buffer);
1353 tr->allocated_snapshot = false;
1354 }
1355
tracing_arm_snapshot_locked(struct trace_array * tr)1356 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1357 {
1358 int ret;
1359
1360 lockdep_assert_held(&trace_types_lock);
1361
1362 spin_lock(&tr->snapshot_trigger_lock);
1363 if (tr->snapshot == UINT_MAX || tr->mapped) {
1364 spin_unlock(&tr->snapshot_trigger_lock);
1365 return -EBUSY;
1366 }
1367
1368 tr->snapshot++;
1369 spin_unlock(&tr->snapshot_trigger_lock);
1370
1371 ret = tracing_alloc_snapshot_instance(tr);
1372 if (ret) {
1373 spin_lock(&tr->snapshot_trigger_lock);
1374 tr->snapshot--;
1375 spin_unlock(&tr->snapshot_trigger_lock);
1376 }
1377
1378 return ret;
1379 }
1380
tracing_arm_snapshot(struct trace_array * tr)1381 int tracing_arm_snapshot(struct trace_array *tr)
1382 {
1383 int ret;
1384
1385 mutex_lock(&trace_types_lock);
1386 ret = tracing_arm_snapshot_locked(tr);
1387 mutex_unlock(&trace_types_lock);
1388
1389 return ret;
1390 }
1391
tracing_disarm_snapshot(struct trace_array * tr)1392 void tracing_disarm_snapshot(struct trace_array *tr)
1393 {
1394 spin_lock(&tr->snapshot_trigger_lock);
1395 if (!WARN_ON(!tr->snapshot))
1396 tr->snapshot--;
1397 spin_unlock(&tr->snapshot_trigger_lock);
1398 }
1399
1400 /**
1401 * tracing_alloc_snapshot - allocate snapshot buffer.
1402 *
1403 * This only allocates the snapshot buffer if it isn't already
1404 * allocated - it doesn't also take a snapshot.
1405 *
1406 * This is meant to be used in cases where the snapshot buffer needs
1407 * to be set up for events that can't sleep but need to be able to
1408 * trigger a snapshot.
1409 */
tracing_alloc_snapshot(void)1410 int tracing_alloc_snapshot(void)
1411 {
1412 struct trace_array *tr = &global_trace;
1413 int ret;
1414
1415 ret = tracing_alloc_snapshot_instance(tr);
1416 WARN_ON(ret < 0);
1417
1418 return ret;
1419 }
1420 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1421
1422 /**
1423 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1424 *
1425 * This is similar to tracing_snapshot(), but it will allocate the
1426 * snapshot buffer if it isn't already allocated. Use this only
1427 * where it is safe to sleep, as the allocation may sleep.
1428 *
1429 * This causes a swap between the snapshot buffer and the current live
1430 * tracing buffer. You can use this to take snapshots of the live
1431 * trace when some condition is triggered, but continue to trace.
1432 */
tracing_snapshot_alloc(void)1433 void tracing_snapshot_alloc(void)
1434 {
1435 int ret;
1436
1437 ret = tracing_alloc_snapshot();
1438 if (ret < 0)
1439 return;
1440
1441 tracing_snapshot();
1442 }
1443 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1444
1445 /**
1446 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1447 * @tr: The tracing instance
1448 * @cond_data: User data to associate with the snapshot
1449 * @update: Implementation of the cond_snapshot update function
1450 *
1451 * Check whether the conditional snapshot for the given instance has
1452 * already been enabled, or if the current tracer is already using a
1453 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1454 * save the cond_data and update function inside.
1455 *
1456 * Returns 0 if successful, error otherwise.
1457 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1458 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1459 cond_update_fn_t update)
1460 {
1461 struct cond_snapshot *cond_snapshot __free(kfree) =
1462 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1463 int ret;
1464
1465 if (!cond_snapshot)
1466 return -ENOMEM;
1467
1468 cond_snapshot->cond_data = cond_data;
1469 cond_snapshot->update = update;
1470
1471 guard(mutex)(&trace_types_lock);
1472
1473 if (tr->current_trace->use_max_tr)
1474 return -EBUSY;
1475
1476 /*
1477 * The cond_snapshot can only change to NULL without the
1478 * trace_types_lock. We don't care if we race with it going
1479 * to NULL, but we want to make sure that it's not set to
1480 * something other than NULL when we get here, which we can
1481 * do safely with only holding the trace_types_lock and not
1482 * having to take the max_lock.
1483 */
1484 if (tr->cond_snapshot)
1485 return -EBUSY;
1486
1487 ret = tracing_arm_snapshot_locked(tr);
1488 if (ret)
1489 return ret;
1490
1491 local_irq_disable();
1492 arch_spin_lock(&tr->max_lock);
1493 tr->cond_snapshot = no_free_ptr(cond_snapshot);
1494 arch_spin_unlock(&tr->max_lock);
1495 local_irq_enable();
1496
1497 return 0;
1498 }
1499 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1500
1501 /**
1502 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1503 * @tr: The tracing instance
1504 *
1505 * Check whether the conditional snapshot for the given instance is
1506 * enabled; if so, free the cond_snapshot associated with it,
1507 * otherwise return -EINVAL.
1508 *
1509 * Returns 0 if successful, error otherwise.
1510 */
tracing_snapshot_cond_disable(struct trace_array * tr)1511 int tracing_snapshot_cond_disable(struct trace_array *tr)
1512 {
1513 int ret = 0;
1514
1515 local_irq_disable();
1516 arch_spin_lock(&tr->max_lock);
1517
1518 if (!tr->cond_snapshot)
1519 ret = -EINVAL;
1520 else {
1521 kfree(tr->cond_snapshot);
1522 tr->cond_snapshot = NULL;
1523 }
1524
1525 arch_spin_unlock(&tr->max_lock);
1526 local_irq_enable();
1527
1528 tracing_disarm_snapshot(tr);
1529
1530 return ret;
1531 }
1532 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1533 #else
tracing_snapshot(void)1534 void tracing_snapshot(void)
1535 {
1536 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1539 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1540 {
1541 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1542 }
1543 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1544 int tracing_alloc_snapshot(void)
1545 {
1546 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1547 return -ENODEV;
1548 }
1549 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1550 void tracing_snapshot_alloc(void)
1551 {
1552 /* Give warning */
1553 tracing_snapshot();
1554 }
1555 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1556 void *tracing_cond_snapshot_data(struct trace_array *tr)
1557 {
1558 return NULL;
1559 }
1560 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1561 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1562 {
1563 return -ENODEV;
1564 }
1565 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1566 int tracing_snapshot_cond_disable(struct trace_array *tr)
1567 {
1568 return false;
1569 }
1570 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1571 #define free_snapshot(tr) do { } while (0)
1572 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1573 #endif /* CONFIG_TRACER_SNAPSHOT */
1574
tracer_tracing_off(struct trace_array * tr)1575 void tracer_tracing_off(struct trace_array *tr)
1576 {
1577 if (tr->array_buffer.buffer)
1578 ring_buffer_record_off(tr->array_buffer.buffer);
1579 /*
1580 * This flag is looked at when buffers haven't been allocated
1581 * yet, or by some tracers (like irqsoff), that just want to
1582 * know if the ring buffer has been disabled, but it can handle
1583 * races of where it gets disabled but we still do a record.
1584 * As the check is in the fast path of the tracers, it is more
1585 * important to be fast than accurate.
1586 */
1587 tr->buffer_disabled = 1;
1588 /* Make the flag seen by readers */
1589 smp_wmb();
1590 }
1591
1592 /**
1593 * tracing_off - turn off tracing buffers
1594 *
1595 * This function stops the tracing buffers from recording data.
1596 * It does not disable any overhead the tracers themselves may
1597 * be causing. This function simply causes all recording to
1598 * the ring buffers to fail.
1599 */
tracing_off(void)1600 void tracing_off(void)
1601 {
1602 tracer_tracing_off(&global_trace);
1603 }
1604 EXPORT_SYMBOL_GPL(tracing_off);
1605
disable_trace_on_warning(void)1606 void disable_trace_on_warning(void)
1607 {
1608 if (__disable_trace_on_warning) {
1609 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1610 "Disabling tracing due to warning\n");
1611 tracing_off();
1612 }
1613 }
1614
1615 /**
1616 * tracer_tracing_is_on - show real state of ring buffer enabled
1617 * @tr : the trace array to know if ring buffer is enabled
1618 *
1619 * Shows real state of the ring buffer if it is enabled or not.
1620 */
tracer_tracing_is_on(struct trace_array * tr)1621 bool tracer_tracing_is_on(struct trace_array *tr)
1622 {
1623 if (tr->array_buffer.buffer)
1624 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1625 return !tr->buffer_disabled;
1626 }
1627
1628 /**
1629 * tracing_is_on - show state of ring buffers enabled
1630 */
tracing_is_on(void)1631 int tracing_is_on(void)
1632 {
1633 return tracer_tracing_is_on(&global_trace);
1634 }
1635
set_buf_size(char * str)1636 static int __init set_buf_size(char *str)
1637 {
1638 unsigned long buf_size;
1639
1640 if (!str)
1641 return 0;
1642 buf_size = memparse(str, &str);
1643 /*
1644 * nr_entries can not be zero and the startup
1645 * tests require some buffer space. Therefore
1646 * ensure we have at least 4096 bytes of buffer.
1647 */
1648 trace_buf_size = max(4096UL, buf_size);
1649 return 1;
1650 }
1651 __setup("trace_buf_size=", set_buf_size);
1652
set_tracing_thresh(char * str)1653 static int __init set_tracing_thresh(char *str)
1654 {
1655 unsigned long threshold;
1656 int ret;
1657
1658 if (!str)
1659 return 0;
1660 ret = kstrtoul(str, 0, &threshold);
1661 if (ret < 0)
1662 return 0;
1663 tracing_thresh = threshold * 1000;
1664 return 1;
1665 }
1666 __setup("tracing_thresh=", set_tracing_thresh);
1667
nsecs_to_usecs(unsigned long nsecs)1668 unsigned long nsecs_to_usecs(unsigned long nsecs)
1669 {
1670 return nsecs / 1000;
1671 }
1672
1673 /*
1674 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1675 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1676 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1677 * of strings in the order that the evals (enum) were defined.
1678 */
1679 #undef C
1680 #define C(a, b) b
1681
1682 /* These must match the bit positions in trace_iterator_flags */
1683 static const char *trace_options[] = {
1684 TRACE_FLAGS
1685 NULL
1686 };
1687
1688 static struct {
1689 u64 (*func)(void);
1690 const char *name;
1691 int in_ns; /* is this clock in nanoseconds? */
1692 } trace_clocks[] = {
1693 { trace_clock_local, "local", 1 },
1694 { trace_clock_global, "global", 1 },
1695 { trace_clock_counter, "counter", 0 },
1696 { trace_clock_jiffies, "uptime", 0 },
1697 { trace_clock, "perf", 1 },
1698 { ktime_get_mono_fast_ns, "mono", 1 },
1699 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1700 { ktime_get_boot_fast_ns, "boot", 1 },
1701 { ktime_get_tai_fast_ns, "tai", 1 },
1702 ARCH_TRACE_CLOCKS
1703 };
1704
trace_clock_in_ns(struct trace_array * tr)1705 bool trace_clock_in_ns(struct trace_array *tr)
1706 {
1707 if (trace_clocks[tr->clock_id].in_ns)
1708 return true;
1709
1710 return false;
1711 }
1712
1713 /*
1714 * trace_parser_get_init - gets the buffer for trace parser
1715 */
trace_parser_get_init(struct trace_parser * parser,int size)1716 int trace_parser_get_init(struct trace_parser *parser, int size)
1717 {
1718 memset(parser, 0, sizeof(*parser));
1719
1720 parser->buffer = kmalloc(size, GFP_KERNEL);
1721 if (!parser->buffer)
1722 return 1;
1723
1724 parser->size = size;
1725 return 0;
1726 }
1727
1728 /*
1729 * trace_parser_put - frees the buffer for trace parser
1730 */
trace_parser_put(struct trace_parser * parser)1731 void trace_parser_put(struct trace_parser *parser)
1732 {
1733 kfree(parser->buffer);
1734 parser->buffer = NULL;
1735 }
1736
1737 /*
1738 * trace_get_user - reads the user input string separated by space
1739 * (matched by isspace(ch))
1740 *
1741 * For each string found the 'struct trace_parser' is updated,
1742 * and the function returns.
1743 *
1744 * Returns number of bytes read.
1745 *
1746 * See kernel/trace/trace.h for 'struct trace_parser' details.
1747 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1748 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1749 size_t cnt, loff_t *ppos)
1750 {
1751 char ch;
1752 size_t read = 0;
1753 ssize_t ret;
1754
1755 if (!*ppos)
1756 trace_parser_clear(parser);
1757
1758 ret = get_user(ch, ubuf++);
1759 if (ret)
1760 goto fail;
1761
1762 read++;
1763 cnt--;
1764
1765 /*
1766 * The parser is not finished with the last write,
1767 * continue reading the user input without skipping spaces.
1768 */
1769 if (!parser->cont) {
1770 /* skip white space */
1771 while (cnt && isspace(ch)) {
1772 ret = get_user(ch, ubuf++);
1773 if (ret)
1774 goto fail;
1775 read++;
1776 cnt--;
1777 }
1778
1779 parser->idx = 0;
1780
1781 /* only spaces were written */
1782 if (isspace(ch) || !ch) {
1783 *ppos += read;
1784 return read;
1785 }
1786 }
1787
1788 /* read the non-space input */
1789 while (cnt && !isspace(ch) && ch) {
1790 if (parser->idx < parser->size - 1)
1791 parser->buffer[parser->idx++] = ch;
1792 else {
1793 ret = -EINVAL;
1794 goto fail;
1795 }
1796
1797 ret = get_user(ch, ubuf++);
1798 if (ret)
1799 goto fail;
1800 read++;
1801 cnt--;
1802 }
1803
1804 /* We either got finished input or we have to wait for another call. */
1805 if (isspace(ch) || !ch) {
1806 parser->buffer[parser->idx] = 0;
1807 parser->cont = false;
1808 } else if (parser->idx < parser->size - 1) {
1809 parser->cont = true;
1810 parser->buffer[parser->idx++] = ch;
1811 /* Make sure the parsed string always terminates with '\0'. */
1812 parser->buffer[parser->idx] = 0;
1813 } else {
1814 ret = -EINVAL;
1815 goto fail;
1816 }
1817
1818 *ppos += read;
1819 return read;
1820 fail:
1821 trace_parser_fail(parser);
1822 return ret;
1823 }
1824
1825 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1826 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1827 {
1828 int len;
1829
1830 if (trace_seq_used(s) <= s->readpos)
1831 return -EBUSY;
1832
1833 len = trace_seq_used(s) - s->readpos;
1834 if (cnt > len)
1835 cnt = len;
1836 memcpy(buf, s->buffer + s->readpos, cnt);
1837
1838 s->readpos += cnt;
1839 return cnt;
1840 }
1841
1842 unsigned long __read_mostly tracing_thresh;
1843
1844 #ifdef CONFIG_TRACER_MAX_TRACE
1845 static const struct file_operations tracing_max_lat_fops;
1846
1847 #ifdef LATENCY_FS_NOTIFY
1848
1849 static struct workqueue_struct *fsnotify_wq;
1850
latency_fsnotify_workfn(struct work_struct * work)1851 static void latency_fsnotify_workfn(struct work_struct *work)
1852 {
1853 struct trace_array *tr = container_of(work, struct trace_array,
1854 fsnotify_work);
1855 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1856 }
1857
latency_fsnotify_workfn_irq(struct irq_work * iwork)1858 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1859 {
1860 struct trace_array *tr = container_of(iwork, struct trace_array,
1861 fsnotify_irqwork);
1862 queue_work(fsnotify_wq, &tr->fsnotify_work);
1863 }
1864
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1865 static void trace_create_maxlat_file(struct trace_array *tr,
1866 struct dentry *d_tracer)
1867 {
1868 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1869 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1870 tr->d_max_latency = trace_create_file("tracing_max_latency",
1871 TRACE_MODE_WRITE,
1872 d_tracer, tr,
1873 &tracing_max_lat_fops);
1874 }
1875
latency_fsnotify_init(void)1876 __init static int latency_fsnotify_init(void)
1877 {
1878 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1879 WQ_UNBOUND | WQ_HIGHPRI, 0);
1880 if (!fsnotify_wq) {
1881 pr_err("Unable to allocate tr_max_lat_wq\n");
1882 return -ENOMEM;
1883 }
1884 return 0;
1885 }
1886
1887 late_initcall_sync(latency_fsnotify_init);
1888
latency_fsnotify(struct trace_array * tr)1889 void latency_fsnotify(struct trace_array *tr)
1890 {
1891 if (!fsnotify_wq)
1892 return;
1893 /*
1894 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1895 * possible that we are called from __schedule() or do_idle(), which
1896 * could cause a deadlock.
1897 */
1898 irq_work_queue(&tr->fsnotify_irqwork);
1899 }
1900
1901 #else /* !LATENCY_FS_NOTIFY */
1902
1903 #define trace_create_maxlat_file(tr, d_tracer) \
1904 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1905 d_tracer, tr, &tracing_max_lat_fops)
1906
1907 #endif
1908
1909 /*
1910 * Copy the new maximum trace into the separate maximum-trace
1911 * structure. (this way the maximum trace is permanently saved,
1912 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1913 */
1914 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1915 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1916 {
1917 struct array_buffer *trace_buf = &tr->array_buffer;
1918 struct array_buffer *max_buf = &tr->max_buffer;
1919 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1920 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1921
1922 max_buf->cpu = cpu;
1923 max_buf->time_start = data->preempt_timestamp;
1924
1925 max_data->saved_latency = tr->max_latency;
1926 max_data->critical_start = data->critical_start;
1927 max_data->critical_end = data->critical_end;
1928
1929 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1930 max_data->pid = tsk->pid;
1931 /*
1932 * If tsk == current, then use current_uid(), as that does not use
1933 * RCU. The irq tracer can be called out of RCU scope.
1934 */
1935 if (tsk == current)
1936 max_data->uid = current_uid();
1937 else
1938 max_data->uid = task_uid(tsk);
1939
1940 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1941 max_data->policy = tsk->policy;
1942 max_data->rt_priority = tsk->rt_priority;
1943
1944 /* record this tasks comm */
1945 tracing_record_cmdline(tsk);
1946 latency_fsnotify(tr);
1947 }
1948
1949 /**
1950 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1951 * @tr: tracer
1952 * @tsk: the task with the latency
1953 * @cpu: The cpu that initiated the trace.
1954 * @cond_data: User data associated with a conditional snapshot
1955 *
1956 * Flip the buffers between the @tr and the max_tr and record information
1957 * about which task was the cause of this latency.
1958 */
1959 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1960 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1961 void *cond_data)
1962 {
1963 if (tr->stop_count)
1964 return;
1965
1966 WARN_ON_ONCE(!irqs_disabled());
1967
1968 if (!tr->allocated_snapshot) {
1969 /* Only the nop tracer should hit this when disabling */
1970 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1971 return;
1972 }
1973
1974 arch_spin_lock(&tr->max_lock);
1975
1976 /* Inherit the recordable setting from array_buffer */
1977 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1978 ring_buffer_record_on(tr->max_buffer.buffer);
1979 else
1980 ring_buffer_record_off(tr->max_buffer.buffer);
1981
1982 #ifdef CONFIG_TRACER_SNAPSHOT
1983 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1984 arch_spin_unlock(&tr->max_lock);
1985 return;
1986 }
1987 #endif
1988 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1989
1990 __update_max_tr(tr, tsk, cpu);
1991
1992 arch_spin_unlock(&tr->max_lock);
1993
1994 /* Any waiters on the old snapshot buffer need to wake up */
1995 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1996 }
1997
1998 /**
1999 * update_max_tr_single - only copy one trace over, and reset the rest
2000 * @tr: tracer
2001 * @tsk: task with the latency
2002 * @cpu: the cpu of the buffer to copy.
2003 *
2004 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2005 */
2006 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2007 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2008 {
2009 int ret;
2010
2011 if (tr->stop_count)
2012 return;
2013
2014 WARN_ON_ONCE(!irqs_disabled());
2015 if (!tr->allocated_snapshot) {
2016 /* Only the nop tracer should hit this when disabling */
2017 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2018 return;
2019 }
2020
2021 arch_spin_lock(&tr->max_lock);
2022
2023 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2024
2025 if (ret == -EBUSY) {
2026 /*
2027 * We failed to swap the buffer due to a commit taking
2028 * place on this CPU. We fail to record, but we reset
2029 * the max trace buffer (no one writes directly to it)
2030 * and flag that it failed.
2031 * Another reason is resize is in progress.
2032 */
2033 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2034 "Failed to swap buffers due to commit or resize in progress\n");
2035 }
2036
2037 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2038
2039 __update_max_tr(tr, tsk, cpu);
2040 arch_spin_unlock(&tr->max_lock);
2041 }
2042
2043 #endif /* CONFIG_TRACER_MAX_TRACE */
2044
2045 struct pipe_wait {
2046 struct trace_iterator *iter;
2047 int wait_index;
2048 };
2049
wait_pipe_cond(void * data)2050 static bool wait_pipe_cond(void *data)
2051 {
2052 struct pipe_wait *pwait = data;
2053 struct trace_iterator *iter = pwait->iter;
2054
2055 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2056 return true;
2057
2058 return iter->closed;
2059 }
2060
wait_on_pipe(struct trace_iterator * iter,int full)2061 static int wait_on_pipe(struct trace_iterator *iter, int full)
2062 {
2063 struct pipe_wait pwait;
2064 int ret;
2065
2066 /* Iterators are static, they should be filled or empty */
2067 if (trace_buffer_iter(iter, iter->cpu_file))
2068 return 0;
2069
2070 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2071 pwait.iter = iter;
2072
2073 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2074 wait_pipe_cond, &pwait);
2075
2076 #ifdef CONFIG_TRACER_MAX_TRACE
2077 /*
2078 * Make sure this is still the snapshot buffer, as if a snapshot were
2079 * to happen, this would now be the main buffer.
2080 */
2081 if (iter->snapshot)
2082 iter->array_buffer = &iter->tr->max_buffer;
2083 #endif
2084 return ret;
2085 }
2086
2087 #ifdef CONFIG_FTRACE_STARTUP_TEST
2088 static bool selftests_can_run;
2089
2090 struct trace_selftests {
2091 struct list_head list;
2092 struct tracer *type;
2093 };
2094
2095 static LIST_HEAD(postponed_selftests);
2096
save_selftest(struct tracer * type)2097 static int save_selftest(struct tracer *type)
2098 {
2099 struct trace_selftests *selftest;
2100
2101 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2102 if (!selftest)
2103 return -ENOMEM;
2104
2105 selftest->type = type;
2106 list_add(&selftest->list, &postponed_selftests);
2107 return 0;
2108 }
2109
run_tracer_selftest(struct tracer * type)2110 static int run_tracer_selftest(struct tracer *type)
2111 {
2112 struct trace_array *tr = &global_trace;
2113 struct tracer *saved_tracer = tr->current_trace;
2114 int ret;
2115
2116 if (!type->selftest || tracing_selftest_disabled)
2117 return 0;
2118
2119 /*
2120 * If a tracer registers early in boot up (before scheduling is
2121 * initialized and such), then do not run its selftests yet.
2122 * Instead, run it a little later in the boot process.
2123 */
2124 if (!selftests_can_run)
2125 return save_selftest(type);
2126
2127 if (!tracing_is_on()) {
2128 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2129 type->name);
2130 return 0;
2131 }
2132
2133 /*
2134 * Run a selftest on this tracer.
2135 * Here we reset the trace buffer, and set the current
2136 * tracer to be this tracer. The tracer can then run some
2137 * internal tracing to verify that everything is in order.
2138 * If we fail, we do not register this tracer.
2139 */
2140 tracing_reset_online_cpus(&tr->array_buffer);
2141
2142 tr->current_trace = type;
2143
2144 #ifdef CONFIG_TRACER_MAX_TRACE
2145 if (type->use_max_tr) {
2146 /* If we expanded the buffers, make sure the max is expanded too */
2147 if (tr->ring_buffer_expanded)
2148 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2149 RING_BUFFER_ALL_CPUS);
2150 tr->allocated_snapshot = true;
2151 }
2152 #endif
2153
2154 /* the test is responsible for initializing and enabling */
2155 pr_info("Testing tracer %s: ", type->name);
2156 ret = type->selftest(type, tr);
2157 /* the test is responsible for resetting too */
2158 tr->current_trace = saved_tracer;
2159 if (ret) {
2160 printk(KERN_CONT "FAILED!\n");
2161 /* Add the warning after printing 'FAILED' */
2162 WARN_ON(1);
2163 return -1;
2164 }
2165 /* Only reset on passing, to avoid touching corrupted buffers */
2166 tracing_reset_online_cpus(&tr->array_buffer);
2167
2168 #ifdef CONFIG_TRACER_MAX_TRACE
2169 if (type->use_max_tr) {
2170 tr->allocated_snapshot = false;
2171
2172 /* Shrink the max buffer again */
2173 if (tr->ring_buffer_expanded)
2174 ring_buffer_resize(tr->max_buffer.buffer, 1,
2175 RING_BUFFER_ALL_CPUS);
2176 }
2177 #endif
2178
2179 printk(KERN_CONT "PASSED\n");
2180 return 0;
2181 }
2182
do_run_tracer_selftest(struct tracer * type)2183 static int do_run_tracer_selftest(struct tracer *type)
2184 {
2185 int ret;
2186
2187 /*
2188 * Tests can take a long time, especially if they are run one after the
2189 * other, as does happen during bootup when all the tracers are
2190 * registered. This could cause the soft lockup watchdog to trigger.
2191 */
2192 cond_resched();
2193
2194 tracing_selftest_running = true;
2195 ret = run_tracer_selftest(type);
2196 tracing_selftest_running = false;
2197
2198 return ret;
2199 }
2200
init_trace_selftests(void)2201 static __init int init_trace_selftests(void)
2202 {
2203 struct trace_selftests *p, *n;
2204 struct tracer *t, **last;
2205 int ret;
2206
2207 selftests_can_run = true;
2208
2209 guard(mutex)(&trace_types_lock);
2210
2211 if (list_empty(&postponed_selftests))
2212 return 0;
2213
2214 pr_info("Running postponed tracer tests:\n");
2215
2216 tracing_selftest_running = true;
2217 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2218 /* This loop can take minutes when sanitizers are enabled, so
2219 * lets make sure we allow RCU processing.
2220 */
2221 cond_resched();
2222 ret = run_tracer_selftest(p->type);
2223 /* If the test fails, then warn and remove from available_tracers */
2224 if (ret < 0) {
2225 WARN(1, "tracer: %s failed selftest, disabling\n",
2226 p->type->name);
2227 last = &trace_types;
2228 for (t = trace_types; t; t = t->next) {
2229 if (t == p->type) {
2230 *last = t->next;
2231 break;
2232 }
2233 last = &t->next;
2234 }
2235 }
2236 list_del(&p->list);
2237 kfree(p);
2238 }
2239 tracing_selftest_running = false;
2240
2241 return 0;
2242 }
2243 core_initcall(init_trace_selftests);
2244 #else
do_run_tracer_selftest(struct tracer * type)2245 static inline int do_run_tracer_selftest(struct tracer *type)
2246 {
2247 return 0;
2248 }
2249 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2250
2251 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2252
2253 static void __init apply_trace_boot_options(void);
2254
2255 /**
2256 * register_tracer - register a tracer with the ftrace system.
2257 * @type: the plugin for the tracer
2258 *
2259 * Register a new plugin tracer.
2260 */
register_tracer(struct tracer * type)2261 int __init register_tracer(struct tracer *type)
2262 {
2263 struct tracer *t;
2264 int ret = 0;
2265
2266 if (!type->name) {
2267 pr_info("Tracer must have a name\n");
2268 return -1;
2269 }
2270
2271 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2272 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2273 return -1;
2274 }
2275
2276 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2277 pr_warn("Can not register tracer %s due to lockdown\n",
2278 type->name);
2279 return -EPERM;
2280 }
2281
2282 mutex_lock(&trace_types_lock);
2283
2284 for (t = trace_types; t; t = t->next) {
2285 if (strcmp(type->name, t->name) == 0) {
2286 /* already found */
2287 pr_info("Tracer %s already registered\n",
2288 type->name);
2289 ret = -1;
2290 goto out;
2291 }
2292 }
2293
2294 if (!type->set_flag)
2295 type->set_flag = &dummy_set_flag;
2296 if (!type->flags) {
2297 /*allocate a dummy tracer_flags*/
2298 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2299 if (!type->flags) {
2300 ret = -ENOMEM;
2301 goto out;
2302 }
2303 type->flags->val = 0;
2304 type->flags->opts = dummy_tracer_opt;
2305 } else
2306 if (!type->flags->opts)
2307 type->flags->opts = dummy_tracer_opt;
2308
2309 /* store the tracer for __set_tracer_option */
2310 type->flags->trace = type;
2311
2312 ret = do_run_tracer_selftest(type);
2313 if (ret < 0)
2314 goto out;
2315
2316 type->next = trace_types;
2317 trace_types = type;
2318 add_tracer_options(&global_trace, type);
2319
2320 out:
2321 mutex_unlock(&trace_types_lock);
2322
2323 if (ret || !default_bootup_tracer)
2324 return ret;
2325
2326 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2327 return 0;
2328
2329 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2330 /* Do we want this tracer to start on bootup? */
2331 tracing_set_tracer(&global_trace, type->name);
2332 default_bootup_tracer = NULL;
2333
2334 apply_trace_boot_options();
2335
2336 /* disable other selftests, since this will break it. */
2337 disable_tracing_selftest("running a tracer");
2338
2339 return 0;
2340 }
2341
tracing_reset_cpu(struct array_buffer * buf,int cpu)2342 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2343 {
2344 struct trace_buffer *buffer = buf->buffer;
2345
2346 if (!buffer)
2347 return;
2348
2349 ring_buffer_record_disable(buffer);
2350
2351 /* Make sure all commits have finished */
2352 synchronize_rcu();
2353 ring_buffer_reset_cpu(buffer, cpu);
2354
2355 ring_buffer_record_enable(buffer);
2356 }
2357
tracing_reset_online_cpus(struct array_buffer * buf)2358 void tracing_reset_online_cpus(struct array_buffer *buf)
2359 {
2360 struct trace_buffer *buffer = buf->buffer;
2361
2362 if (!buffer)
2363 return;
2364
2365 ring_buffer_record_disable(buffer);
2366
2367 /* Make sure all commits have finished */
2368 synchronize_rcu();
2369
2370 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2371
2372 ring_buffer_reset_online_cpus(buffer);
2373
2374 ring_buffer_record_enable(buffer);
2375 }
2376
tracing_reset_all_cpus(struct array_buffer * buf)2377 static void tracing_reset_all_cpus(struct array_buffer *buf)
2378 {
2379 struct trace_buffer *buffer = buf->buffer;
2380
2381 if (!buffer)
2382 return;
2383
2384 ring_buffer_record_disable(buffer);
2385
2386 /* Make sure all commits have finished */
2387 synchronize_rcu();
2388
2389 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2390
2391 ring_buffer_reset(buffer);
2392
2393 ring_buffer_record_enable(buffer);
2394 }
2395
2396 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2397 void tracing_reset_all_online_cpus_unlocked(void)
2398 {
2399 struct trace_array *tr;
2400
2401 lockdep_assert_held(&trace_types_lock);
2402
2403 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2404 if (!tr->clear_trace)
2405 continue;
2406 tr->clear_trace = false;
2407 tracing_reset_online_cpus(&tr->array_buffer);
2408 #ifdef CONFIG_TRACER_MAX_TRACE
2409 tracing_reset_online_cpus(&tr->max_buffer);
2410 #endif
2411 }
2412 }
2413
tracing_reset_all_online_cpus(void)2414 void tracing_reset_all_online_cpus(void)
2415 {
2416 mutex_lock(&trace_types_lock);
2417 tracing_reset_all_online_cpus_unlocked();
2418 mutex_unlock(&trace_types_lock);
2419 }
2420
is_tracing_stopped(void)2421 int is_tracing_stopped(void)
2422 {
2423 return global_trace.stop_count;
2424 }
2425
tracing_start_tr(struct trace_array * tr)2426 static void tracing_start_tr(struct trace_array *tr)
2427 {
2428 struct trace_buffer *buffer;
2429 unsigned long flags;
2430
2431 if (tracing_disabled)
2432 return;
2433
2434 raw_spin_lock_irqsave(&tr->start_lock, flags);
2435 if (--tr->stop_count) {
2436 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2437 /* Someone screwed up their debugging */
2438 tr->stop_count = 0;
2439 }
2440 goto out;
2441 }
2442
2443 /* Prevent the buffers from switching */
2444 arch_spin_lock(&tr->max_lock);
2445
2446 buffer = tr->array_buffer.buffer;
2447 if (buffer)
2448 ring_buffer_record_enable(buffer);
2449
2450 #ifdef CONFIG_TRACER_MAX_TRACE
2451 buffer = tr->max_buffer.buffer;
2452 if (buffer)
2453 ring_buffer_record_enable(buffer);
2454 #endif
2455
2456 arch_spin_unlock(&tr->max_lock);
2457
2458 out:
2459 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2460 }
2461
2462 /**
2463 * tracing_start - quick start of the tracer
2464 *
2465 * If tracing is enabled but was stopped by tracing_stop,
2466 * this will start the tracer back up.
2467 */
tracing_start(void)2468 void tracing_start(void)
2469
2470 {
2471 return tracing_start_tr(&global_trace);
2472 }
2473
tracing_stop_tr(struct trace_array * tr)2474 static void tracing_stop_tr(struct trace_array *tr)
2475 {
2476 struct trace_buffer *buffer;
2477 unsigned long flags;
2478
2479 raw_spin_lock_irqsave(&tr->start_lock, flags);
2480 if (tr->stop_count++)
2481 goto out;
2482
2483 /* Prevent the buffers from switching */
2484 arch_spin_lock(&tr->max_lock);
2485
2486 buffer = tr->array_buffer.buffer;
2487 if (buffer)
2488 ring_buffer_record_disable(buffer);
2489
2490 #ifdef CONFIG_TRACER_MAX_TRACE
2491 buffer = tr->max_buffer.buffer;
2492 if (buffer)
2493 ring_buffer_record_disable(buffer);
2494 #endif
2495
2496 arch_spin_unlock(&tr->max_lock);
2497
2498 out:
2499 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2500 }
2501
2502 /**
2503 * tracing_stop - quick stop of the tracer
2504 *
2505 * Light weight way to stop tracing. Use in conjunction with
2506 * tracing_start.
2507 */
tracing_stop(void)2508 void tracing_stop(void)
2509 {
2510 return tracing_stop_tr(&global_trace);
2511 }
2512
2513 /*
2514 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2515 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2516 * simplifies those functions and keeps them in sync.
2517 */
trace_handle_return(struct trace_seq * s)2518 enum print_line_t trace_handle_return(struct trace_seq *s)
2519 {
2520 return trace_seq_has_overflowed(s) ?
2521 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2522 }
2523 EXPORT_SYMBOL_GPL(trace_handle_return);
2524
migration_disable_value(void)2525 static unsigned short migration_disable_value(void)
2526 {
2527 #if defined(CONFIG_SMP)
2528 return current->migration_disabled;
2529 #else
2530 return 0;
2531 #endif
2532 }
2533
tracing_gen_ctx_irq_test(unsigned int irqs_status)2534 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2535 {
2536 unsigned int trace_flags = irqs_status;
2537 unsigned int pc;
2538
2539 pc = preempt_count();
2540
2541 if (pc & NMI_MASK)
2542 trace_flags |= TRACE_FLAG_NMI;
2543 if (pc & HARDIRQ_MASK)
2544 trace_flags |= TRACE_FLAG_HARDIRQ;
2545 if (in_serving_softirq())
2546 trace_flags |= TRACE_FLAG_SOFTIRQ;
2547 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2548 trace_flags |= TRACE_FLAG_BH_OFF;
2549
2550 if (tif_need_resched())
2551 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2552 if (test_preempt_need_resched())
2553 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2554 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2555 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2556 }
2557
2558 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2559 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2560 int type,
2561 unsigned long len,
2562 unsigned int trace_ctx)
2563 {
2564 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2565 }
2566
2567 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2568 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2569 static int trace_buffered_event_ref;
2570
2571 /**
2572 * trace_buffered_event_enable - enable buffering events
2573 *
2574 * When events are being filtered, it is quicker to use a temporary
2575 * buffer to write the event data into if there's a likely chance
2576 * that it will not be committed. The discard of the ring buffer
2577 * is not as fast as committing, and is much slower than copying
2578 * a commit.
2579 *
2580 * When an event is to be filtered, allocate per cpu buffers to
2581 * write the event data into, and if the event is filtered and discarded
2582 * it is simply dropped, otherwise, the entire data is to be committed
2583 * in one shot.
2584 */
trace_buffered_event_enable(void)2585 void trace_buffered_event_enable(void)
2586 {
2587 struct ring_buffer_event *event;
2588 struct page *page;
2589 int cpu;
2590
2591 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2592
2593 if (trace_buffered_event_ref++)
2594 return;
2595
2596 for_each_tracing_cpu(cpu) {
2597 page = alloc_pages_node(cpu_to_node(cpu),
2598 GFP_KERNEL | __GFP_NORETRY, 0);
2599 /* This is just an optimization and can handle failures */
2600 if (!page) {
2601 pr_err("Failed to allocate event buffer\n");
2602 break;
2603 }
2604
2605 event = page_address(page);
2606 memset(event, 0, sizeof(*event));
2607
2608 per_cpu(trace_buffered_event, cpu) = event;
2609
2610 preempt_disable();
2611 if (cpu == smp_processor_id() &&
2612 __this_cpu_read(trace_buffered_event) !=
2613 per_cpu(trace_buffered_event, cpu))
2614 WARN_ON_ONCE(1);
2615 preempt_enable();
2616 }
2617 }
2618
enable_trace_buffered_event(void * data)2619 static void enable_trace_buffered_event(void *data)
2620 {
2621 /* Probably not needed, but do it anyway */
2622 smp_rmb();
2623 this_cpu_dec(trace_buffered_event_cnt);
2624 }
2625
disable_trace_buffered_event(void * data)2626 static void disable_trace_buffered_event(void *data)
2627 {
2628 this_cpu_inc(trace_buffered_event_cnt);
2629 }
2630
2631 /**
2632 * trace_buffered_event_disable - disable buffering events
2633 *
2634 * When a filter is removed, it is faster to not use the buffered
2635 * events, and to commit directly into the ring buffer. Free up
2636 * the temp buffers when there are no more users. This requires
2637 * special synchronization with current events.
2638 */
trace_buffered_event_disable(void)2639 void trace_buffered_event_disable(void)
2640 {
2641 int cpu;
2642
2643 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2644
2645 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2646 return;
2647
2648 if (--trace_buffered_event_ref)
2649 return;
2650
2651 /* For each CPU, set the buffer as used. */
2652 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2653 NULL, true);
2654
2655 /* Wait for all current users to finish */
2656 synchronize_rcu();
2657
2658 for_each_tracing_cpu(cpu) {
2659 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2660 per_cpu(trace_buffered_event, cpu) = NULL;
2661 }
2662
2663 /*
2664 * Wait for all CPUs that potentially started checking if they can use
2665 * their event buffer only after the previous synchronize_rcu() call and
2666 * they still read a valid pointer from trace_buffered_event. It must be
2667 * ensured they don't see cleared trace_buffered_event_cnt else they
2668 * could wrongly decide to use the pointed-to buffer which is now freed.
2669 */
2670 synchronize_rcu();
2671
2672 /* For each CPU, relinquish the buffer */
2673 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2674 true);
2675 }
2676
2677 static struct trace_buffer *temp_buffer;
2678
2679 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2680 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2681 struct trace_event_file *trace_file,
2682 int type, unsigned long len,
2683 unsigned int trace_ctx)
2684 {
2685 struct ring_buffer_event *entry;
2686 struct trace_array *tr = trace_file->tr;
2687 int val;
2688
2689 *current_rb = tr->array_buffer.buffer;
2690
2691 if (!tr->no_filter_buffering_ref &&
2692 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2693 preempt_disable_notrace();
2694 /*
2695 * Filtering is on, so try to use the per cpu buffer first.
2696 * This buffer will simulate a ring_buffer_event,
2697 * where the type_len is zero and the array[0] will
2698 * hold the full length.
2699 * (see include/linux/ring-buffer.h for details on
2700 * how the ring_buffer_event is structured).
2701 *
2702 * Using a temp buffer during filtering and copying it
2703 * on a matched filter is quicker than writing directly
2704 * into the ring buffer and then discarding it when
2705 * it doesn't match. That is because the discard
2706 * requires several atomic operations to get right.
2707 * Copying on match and doing nothing on a failed match
2708 * is still quicker than no copy on match, but having
2709 * to discard out of the ring buffer on a failed match.
2710 */
2711 if ((entry = __this_cpu_read(trace_buffered_event))) {
2712 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2713
2714 val = this_cpu_inc_return(trace_buffered_event_cnt);
2715
2716 /*
2717 * Preemption is disabled, but interrupts and NMIs
2718 * can still come in now. If that happens after
2719 * the above increment, then it will have to go
2720 * back to the old method of allocating the event
2721 * on the ring buffer, and if the filter fails, it
2722 * will have to call ring_buffer_discard_commit()
2723 * to remove it.
2724 *
2725 * Need to also check the unlikely case that the
2726 * length is bigger than the temp buffer size.
2727 * If that happens, then the reserve is pretty much
2728 * guaranteed to fail, as the ring buffer currently
2729 * only allows events less than a page. But that may
2730 * change in the future, so let the ring buffer reserve
2731 * handle the failure in that case.
2732 */
2733 if (val == 1 && likely(len <= max_len)) {
2734 trace_event_setup(entry, type, trace_ctx);
2735 entry->array[0] = len;
2736 /* Return with preemption disabled */
2737 return entry;
2738 }
2739 this_cpu_dec(trace_buffered_event_cnt);
2740 }
2741 /* __trace_buffer_lock_reserve() disables preemption */
2742 preempt_enable_notrace();
2743 }
2744
2745 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2746 trace_ctx);
2747 /*
2748 * If tracing is off, but we have triggers enabled
2749 * we still need to look at the event data. Use the temp_buffer
2750 * to store the trace event for the trigger to use. It's recursive
2751 * safe and will not be recorded anywhere.
2752 */
2753 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2754 *current_rb = temp_buffer;
2755 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2756 trace_ctx);
2757 }
2758 return entry;
2759 }
2760 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2761
2762 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2763 static DEFINE_MUTEX(tracepoint_printk_mutex);
2764
output_printk(struct trace_event_buffer * fbuffer)2765 static void output_printk(struct trace_event_buffer *fbuffer)
2766 {
2767 struct trace_event_call *event_call;
2768 struct trace_event_file *file;
2769 struct trace_event *event;
2770 unsigned long flags;
2771 struct trace_iterator *iter = tracepoint_print_iter;
2772
2773 /* We should never get here if iter is NULL */
2774 if (WARN_ON_ONCE(!iter))
2775 return;
2776
2777 event_call = fbuffer->trace_file->event_call;
2778 if (!event_call || !event_call->event.funcs ||
2779 !event_call->event.funcs->trace)
2780 return;
2781
2782 file = fbuffer->trace_file;
2783 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2784 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2785 !filter_match_preds(file->filter, fbuffer->entry)))
2786 return;
2787
2788 event = &fbuffer->trace_file->event_call->event;
2789
2790 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2791 trace_seq_init(&iter->seq);
2792 iter->ent = fbuffer->entry;
2793 event_call->event.funcs->trace(iter, 0, event);
2794 trace_seq_putc(&iter->seq, 0);
2795 printk("%s", iter->seq.buffer);
2796
2797 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2798 }
2799
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2800 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2801 void *buffer, size_t *lenp,
2802 loff_t *ppos)
2803 {
2804 int save_tracepoint_printk;
2805 int ret;
2806
2807 guard(mutex)(&tracepoint_printk_mutex);
2808 save_tracepoint_printk = tracepoint_printk;
2809
2810 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2811
2812 /*
2813 * This will force exiting early, as tracepoint_printk
2814 * is always zero when tracepoint_printk_iter is not allocated
2815 */
2816 if (!tracepoint_print_iter)
2817 tracepoint_printk = 0;
2818
2819 if (save_tracepoint_printk == tracepoint_printk)
2820 return ret;
2821
2822 if (tracepoint_printk)
2823 static_key_enable(&tracepoint_printk_key.key);
2824 else
2825 static_key_disable(&tracepoint_printk_key.key);
2826
2827 return ret;
2828 }
2829
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2830 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2831 {
2832 enum event_trigger_type tt = ETT_NONE;
2833 struct trace_event_file *file = fbuffer->trace_file;
2834
2835 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2836 fbuffer->entry, &tt))
2837 goto discard;
2838
2839 if (static_key_false(&tracepoint_printk_key.key))
2840 output_printk(fbuffer);
2841
2842 if (static_branch_unlikely(&trace_event_exports_enabled))
2843 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2844
2845 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2846 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2847
2848 discard:
2849 if (tt)
2850 event_triggers_post_call(file, tt);
2851
2852 }
2853 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2854
2855 /*
2856 * Skip 3:
2857 *
2858 * trace_buffer_unlock_commit_regs()
2859 * trace_event_buffer_commit()
2860 * trace_event_raw_event_xxx()
2861 */
2862 # define STACK_SKIP 3
2863
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2864 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2865 struct trace_buffer *buffer,
2866 struct ring_buffer_event *event,
2867 unsigned int trace_ctx,
2868 struct pt_regs *regs)
2869 {
2870 __buffer_unlock_commit(buffer, event);
2871
2872 /*
2873 * If regs is not set, then skip the necessary functions.
2874 * Note, we can still get here via blktrace, wakeup tracer
2875 * and mmiotrace, but that's ok if they lose a function or
2876 * two. They are not that meaningful.
2877 */
2878 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2879 ftrace_trace_userstack(tr, buffer, trace_ctx);
2880 }
2881
2882 /*
2883 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2884 */
2885 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2886 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2887 struct ring_buffer_event *event)
2888 {
2889 __buffer_unlock_commit(buffer, event);
2890 }
2891
2892 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)2893 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2894 parent_ip, unsigned int trace_ctx)
2895 {
2896 struct trace_event_call *call = &event_function;
2897 struct trace_buffer *buffer = tr->array_buffer.buffer;
2898 struct ring_buffer_event *event;
2899 struct ftrace_entry *entry;
2900
2901 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2902 trace_ctx);
2903 if (!event)
2904 return;
2905 entry = ring_buffer_event_data(event);
2906 entry->ip = ip;
2907 entry->parent_ip = parent_ip;
2908
2909 if (!call_filter_check_discard(call, entry, buffer, event)) {
2910 if (static_branch_unlikely(&trace_function_exports_enabled))
2911 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2912 __buffer_unlock_commit(buffer, event);
2913 }
2914 }
2915
2916 #ifdef CONFIG_STACKTRACE
2917
2918 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2919 #define FTRACE_KSTACK_NESTING 4
2920
2921 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2922
2923 struct ftrace_stack {
2924 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2925 };
2926
2927
2928 struct ftrace_stacks {
2929 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2930 };
2931
2932 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2933 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2934
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2935 static void __ftrace_trace_stack(struct trace_array *tr,
2936 struct trace_buffer *buffer,
2937 unsigned int trace_ctx,
2938 int skip, struct pt_regs *regs)
2939 {
2940 struct trace_event_call *call = &event_kernel_stack;
2941 struct ring_buffer_event *event;
2942 unsigned int size, nr_entries;
2943 struct ftrace_stack *fstack;
2944 struct stack_entry *entry;
2945 int stackidx;
2946
2947 /*
2948 * Add one, for this function and the call to save_stack_trace()
2949 * If regs is set, then these functions will not be in the way.
2950 */
2951 #ifndef CONFIG_UNWINDER_ORC
2952 if (!regs)
2953 skip++;
2954 #endif
2955
2956 preempt_disable_notrace();
2957
2958 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2959
2960 /* This should never happen. If it does, yell once and skip */
2961 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2962 goto out;
2963
2964 /*
2965 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2966 * interrupt will either see the value pre increment or post
2967 * increment. If the interrupt happens pre increment it will have
2968 * restored the counter when it returns. We just need a barrier to
2969 * keep gcc from moving things around.
2970 */
2971 barrier();
2972
2973 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2974 size = ARRAY_SIZE(fstack->calls);
2975
2976 if (regs) {
2977 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2978 size, skip);
2979 } else {
2980 nr_entries = stack_trace_save(fstack->calls, size, skip);
2981 }
2982
2983 #ifdef CONFIG_DYNAMIC_FTRACE
2984 /* Mark entry of stack trace as trampoline code */
2985 if (tr->ops && tr->ops->trampoline) {
2986 unsigned long tramp_start = tr->ops->trampoline;
2987 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2988 unsigned long *calls = fstack->calls;
2989
2990 for (int i = 0; i < nr_entries; i++) {
2991 if (calls[i] >= tramp_start && calls[i] < tramp_end)
2992 calls[i] = FTRACE_TRAMPOLINE_MARKER;
2993 }
2994 }
2995 #endif
2996
2997 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2998 struct_size(entry, caller, nr_entries),
2999 trace_ctx);
3000 if (!event)
3001 goto out;
3002 entry = ring_buffer_event_data(event);
3003
3004 entry->size = nr_entries;
3005 memcpy(&entry->caller, fstack->calls,
3006 flex_array_size(entry, caller, nr_entries));
3007
3008 if (!call_filter_check_discard(call, entry, buffer, event))
3009 __buffer_unlock_commit(buffer, event);
3010
3011 out:
3012 /* Again, don't let gcc optimize things here */
3013 barrier();
3014 __this_cpu_dec(ftrace_stack_reserve);
3015 preempt_enable_notrace();
3016
3017 }
3018
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3019 static inline void ftrace_trace_stack(struct trace_array *tr,
3020 struct trace_buffer *buffer,
3021 unsigned int trace_ctx,
3022 int skip, struct pt_regs *regs)
3023 {
3024 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3025 return;
3026
3027 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3028 }
3029
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3030 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3031 int skip)
3032 {
3033 struct trace_buffer *buffer = tr->array_buffer.buffer;
3034
3035 if (rcu_is_watching()) {
3036 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3037 return;
3038 }
3039
3040 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3041 return;
3042
3043 /*
3044 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3045 * but if the above rcu_is_watching() failed, then the NMI
3046 * triggered someplace critical, and ct_irq_enter() should
3047 * not be called from NMI.
3048 */
3049 if (unlikely(in_nmi()))
3050 return;
3051
3052 ct_irq_enter_irqson();
3053 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3054 ct_irq_exit_irqson();
3055 }
3056
3057 /**
3058 * trace_dump_stack - record a stack back trace in the trace buffer
3059 * @skip: Number of functions to skip (helper handlers)
3060 */
trace_dump_stack(int skip)3061 void trace_dump_stack(int skip)
3062 {
3063 if (tracing_disabled || tracing_selftest_running)
3064 return;
3065
3066 #ifndef CONFIG_UNWINDER_ORC
3067 /* Skip 1 to skip this function. */
3068 skip++;
3069 #endif
3070 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3071 tracing_gen_ctx(), skip, NULL);
3072 }
3073 EXPORT_SYMBOL_GPL(trace_dump_stack);
3074
3075 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3076 static DEFINE_PER_CPU(int, user_stack_count);
3077
3078 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3079 ftrace_trace_userstack(struct trace_array *tr,
3080 struct trace_buffer *buffer, unsigned int trace_ctx)
3081 {
3082 struct trace_event_call *call = &event_user_stack;
3083 struct ring_buffer_event *event;
3084 struct userstack_entry *entry;
3085
3086 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3087 return;
3088
3089 /*
3090 * NMIs can not handle page faults, even with fix ups.
3091 * The save user stack can (and often does) fault.
3092 */
3093 if (unlikely(in_nmi()))
3094 return;
3095
3096 /*
3097 * prevent recursion, since the user stack tracing may
3098 * trigger other kernel events.
3099 */
3100 preempt_disable();
3101 if (__this_cpu_read(user_stack_count))
3102 goto out;
3103
3104 __this_cpu_inc(user_stack_count);
3105
3106 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3107 sizeof(*entry), trace_ctx);
3108 if (!event)
3109 goto out_drop_count;
3110 entry = ring_buffer_event_data(event);
3111
3112 entry->tgid = current->tgid;
3113 memset(&entry->caller, 0, sizeof(entry->caller));
3114
3115 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3116 if (!call_filter_check_discard(call, entry, buffer, event))
3117 __buffer_unlock_commit(buffer, event);
3118
3119 out_drop_count:
3120 __this_cpu_dec(user_stack_count);
3121 out:
3122 preempt_enable();
3123 }
3124 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3125 static void ftrace_trace_userstack(struct trace_array *tr,
3126 struct trace_buffer *buffer,
3127 unsigned int trace_ctx)
3128 {
3129 }
3130 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3131
3132 #endif /* CONFIG_STACKTRACE */
3133
3134 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3135 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3136 unsigned long long delta)
3137 {
3138 entry->bottom_delta_ts = delta & U32_MAX;
3139 entry->top_delta_ts = (delta >> 32);
3140 }
3141
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3142 void trace_last_func_repeats(struct trace_array *tr,
3143 struct trace_func_repeats *last_info,
3144 unsigned int trace_ctx)
3145 {
3146 struct trace_buffer *buffer = tr->array_buffer.buffer;
3147 struct func_repeats_entry *entry;
3148 struct ring_buffer_event *event;
3149 u64 delta;
3150
3151 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3152 sizeof(*entry), trace_ctx);
3153 if (!event)
3154 return;
3155
3156 delta = ring_buffer_event_time_stamp(buffer, event) -
3157 last_info->ts_last_call;
3158
3159 entry = ring_buffer_event_data(event);
3160 entry->ip = last_info->ip;
3161 entry->parent_ip = last_info->parent_ip;
3162 entry->count = last_info->count;
3163 func_repeats_set_delta_ts(entry, delta);
3164
3165 __buffer_unlock_commit(buffer, event);
3166 }
3167
3168 /* created for use with alloc_percpu */
3169 struct trace_buffer_struct {
3170 int nesting;
3171 char buffer[4][TRACE_BUF_SIZE];
3172 };
3173
3174 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3175
3176 /*
3177 * This allows for lockless recording. If we're nested too deeply, then
3178 * this returns NULL.
3179 */
get_trace_buf(void)3180 static char *get_trace_buf(void)
3181 {
3182 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3183
3184 if (!trace_percpu_buffer || buffer->nesting >= 4)
3185 return NULL;
3186
3187 buffer->nesting++;
3188
3189 /* Interrupts must see nesting incremented before we use the buffer */
3190 barrier();
3191 return &buffer->buffer[buffer->nesting - 1][0];
3192 }
3193
put_trace_buf(void)3194 static void put_trace_buf(void)
3195 {
3196 /* Don't let the decrement of nesting leak before this */
3197 barrier();
3198 this_cpu_dec(trace_percpu_buffer->nesting);
3199 }
3200
alloc_percpu_trace_buffer(void)3201 static int alloc_percpu_trace_buffer(void)
3202 {
3203 struct trace_buffer_struct __percpu *buffers;
3204
3205 if (trace_percpu_buffer)
3206 return 0;
3207
3208 buffers = alloc_percpu(struct trace_buffer_struct);
3209 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3210 return -ENOMEM;
3211
3212 trace_percpu_buffer = buffers;
3213 return 0;
3214 }
3215
3216 static int buffers_allocated;
3217
trace_printk_init_buffers(void)3218 void trace_printk_init_buffers(void)
3219 {
3220 if (buffers_allocated)
3221 return;
3222
3223 if (alloc_percpu_trace_buffer())
3224 return;
3225
3226 /* trace_printk() is for debug use only. Don't use it in production. */
3227
3228 pr_warn("\n");
3229 pr_warn("**********************************************************\n");
3230 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3231 pr_warn("** **\n");
3232 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3233 pr_warn("** **\n");
3234 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3235 pr_warn("** unsafe for production use. **\n");
3236 pr_warn("** **\n");
3237 pr_warn("** If you see this message and you are not debugging **\n");
3238 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3239 pr_warn("** **\n");
3240 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3241 pr_warn("**********************************************************\n");
3242
3243 /* Expand the buffers to set size */
3244 tracing_update_buffers(&global_trace);
3245
3246 buffers_allocated = 1;
3247
3248 /*
3249 * trace_printk_init_buffers() can be called by modules.
3250 * If that happens, then we need to start cmdline recording
3251 * directly here. If the global_trace.buffer is already
3252 * allocated here, then this was called by module code.
3253 */
3254 if (global_trace.array_buffer.buffer)
3255 tracing_start_cmdline_record();
3256 }
3257 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3258
trace_printk_start_comm(void)3259 void trace_printk_start_comm(void)
3260 {
3261 /* Start tracing comms if trace printk is set */
3262 if (!buffers_allocated)
3263 return;
3264 tracing_start_cmdline_record();
3265 }
3266
trace_printk_start_stop_comm(int enabled)3267 static void trace_printk_start_stop_comm(int enabled)
3268 {
3269 if (!buffers_allocated)
3270 return;
3271
3272 if (enabled)
3273 tracing_start_cmdline_record();
3274 else
3275 tracing_stop_cmdline_record();
3276 }
3277
3278 /**
3279 * trace_vbprintk - write binary msg to tracing buffer
3280 * @ip: The address of the caller
3281 * @fmt: The string format to write to the buffer
3282 * @args: Arguments for @fmt
3283 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3284 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3285 {
3286 struct trace_event_call *call = &event_bprint;
3287 struct ring_buffer_event *event;
3288 struct trace_buffer *buffer;
3289 struct trace_array *tr = READ_ONCE(printk_trace);
3290 struct bprint_entry *entry;
3291 unsigned int trace_ctx;
3292 char *tbuffer;
3293 int len = 0, size;
3294
3295 if (!printk_binsafe(tr))
3296 return trace_vprintk(ip, fmt, args);
3297
3298 if (unlikely(tracing_selftest_running || tracing_disabled))
3299 return 0;
3300
3301 /* Don't pollute graph traces with trace_vprintk internals */
3302 pause_graph_tracing();
3303
3304 trace_ctx = tracing_gen_ctx();
3305 preempt_disable_notrace();
3306
3307 tbuffer = get_trace_buf();
3308 if (!tbuffer) {
3309 len = 0;
3310 goto out_nobuffer;
3311 }
3312
3313 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3314
3315 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3316 goto out_put;
3317
3318 size = sizeof(*entry) + sizeof(u32) * len;
3319 buffer = tr->array_buffer.buffer;
3320 ring_buffer_nest_start(buffer);
3321 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3322 trace_ctx);
3323 if (!event)
3324 goto out;
3325 entry = ring_buffer_event_data(event);
3326 entry->ip = ip;
3327 entry->fmt = fmt;
3328
3329 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3330 if (!call_filter_check_discard(call, entry, buffer, event)) {
3331 __buffer_unlock_commit(buffer, event);
3332 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3333 }
3334
3335 out:
3336 ring_buffer_nest_end(buffer);
3337 out_put:
3338 put_trace_buf();
3339
3340 out_nobuffer:
3341 preempt_enable_notrace();
3342 unpause_graph_tracing();
3343
3344 return len;
3345 }
3346 EXPORT_SYMBOL_GPL(trace_vbprintk);
3347
3348 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3349 int __trace_array_vprintk(struct trace_buffer *buffer,
3350 unsigned long ip, const char *fmt, va_list args)
3351 {
3352 struct trace_event_call *call = &event_print;
3353 struct ring_buffer_event *event;
3354 int len = 0, size;
3355 struct print_entry *entry;
3356 unsigned int trace_ctx;
3357 char *tbuffer;
3358
3359 if (tracing_disabled)
3360 return 0;
3361
3362 /* Don't pollute graph traces with trace_vprintk internals */
3363 pause_graph_tracing();
3364
3365 trace_ctx = tracing_gen_ctx();
3366 preempt_disable_notrace();
3367
3368
3369 tbuffer = get_trace_buf();
3370 if (!tbuffer) {
3371 len = 0;
3372 goto out_nobuffer;
3373 }
3374
3375 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3376
3377 size = sizeof(*entry) + len + 1;
3378 ring_buffer_nest_start(buffer);
3379 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3380 trace_ctx);
3381 if (!event)
3382 goto out;
3383 entry = ring_buffer_event_data(event);
3384 entry->ip = ip;
3385
3386 memcpy(&entry->buf, tbuffer, len + 1);
3387 if (!call_filter_check_discard(call, entry, buffer, event)) {
3388 __buffer_unlock_commit(buffer, event);
3389 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3390 }
3391
3392 out:
3393 ring_buffer_nest_end(buffer);
3394 put_trace_buf();
3395
3396 out_nobuffer:
3397 preempt_enable_notrace();
3398 unpause_graph_tracing();
3399
3400 return len;
3401 }
3402
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3403 int trace_array_vprintk(struct trace_array *tr,
3404 unsigned long ip, const char *fmt, va_list args)
3405 {
3406 if (tracing_selftest_running && tr == &global_trace)
3407 return 0;
3408
3409 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3410 }
3411
3412 /**
3413 * trace_array_printk - Print a message to a specific instance
3414 * @tr: The instance trace_array descriptor
3415 * @ip: The instruction pointer that this is called from.
3416 * @fmt: The format to print (printf format)
3417 *
3418 * If a subsystem sets up its own instance, they have the right to
3419 * printk strings into their tracing instance buffer using this
3420 * function. Note, this function will not write into the top level
3421 * buffer (use trace_printk() for that), as writing into the top level
3422 * buffer should only have events that can be individually disabled.
3423 * trace_printk() is only used for debugging a kernel, and should not
3424 * be ever incorporated in normal use.
3425 *
3426 * trace_array_printk() can be used, as it will not add noise to the
3427 * top level tracing buffer.
3428 *
3429 * Note, trace_array_init_printk() must be called on @tr before this
3430 * can be used.
3431 */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3432 int trace_array_printk(struct trace_array *tr,
3433 unsigned long ip, const char *fmt, ...)
3434 {
3435 int ret;
3436 va_list ap;
3437
3438 if (!tr)
3439 return -ENOENT;
3440
3441 /* This is only allowed for created instances */
3442 if (tr == &global_trace)
3443 return 0;
3444
3445 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3446 return 0;
3447
3448 va_start(ap, fmt);
3449 ret = trace_array_vprintk(tr, ip, fmt, ap);
3450 va_end(ap);
3451 return ret;
3452 }
3453 EXPORT_SYMBOL_GPL(trace_array_printk);
3454
3455 /**
3456 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3457 * @tr: The trace array to initialize the buffers for
3458 *
3459 * As trace_array_printk() only writes into instances, they are OK to
3460 * have in the kernel (unlike trace_printk()). This needs to be called
3461 * before trace_array_printk() can be used on a trace_array.
3462 */
trace_array_init_printk(struct trace_array * tr)3463 int trace_array_init_printk(struct trace_array *tr)
3464 {
3465 if (!tr)
3466 return -ENOENT;
3467
3468 /* This is only allowed for created instances */
3469 if (tr == &global_trace)
3470 return -EINVAL;
3471
3472 return alloc_percpu_trace_buffer();
3473 }
3474 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3475
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3476 int trace_array_printk_buf(struct trace_buffer *buffer,
3477 unsigned long ip, const char *fmt, ...)
3478 {
3479 int ret;
3480 va_list ap;
3481
3482 if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3483 return 0;
3484
3485 va_start(ap, fmt);
3486 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3487 va_end(ap);
3488 return ret;
3489 }
3490
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3491 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3492 {
3493 return trace_array_vprintk(printk_trace, ip, fmt, args);
3494 }
3495 EXPORT_SYMBOL_GPL(trace_vprintk);
3496
trace_iterator_increment(struct trace_iterator * iter)3497 static void trace_iterator_increment(struct trace_iterator *iter)
3498 {
3499 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3500
3501 iter->idx++;
3502 if (buf_iter)
3503 ring_buffer_iter_advance(buf_iter);
3504 }
3505
3506 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3507 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3508 unsigned long *lost_events)
3509 {
3510 struct ring_buffer_event *event;
3511 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3512
3513 if (buf_iter) {
3514 event = ring_buffer_iter_peek(buf_iter, ts);
3515 if (lost_events)
3516 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3517 (unsigned long)-1 : 0;
3518 } else {
3519 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3520 lost_events);
3521 }
3522
3523 if (event) {
3524 iter->ent_size = ring_buffer_event_length(event);
3525 return ring_buffer_event_data(event);
3526 }
3527 iter->ent_size = 0;
3528 return NULL;
3529 }
3530
3531 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3532 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3533 unsigned long *missing_events, u64 *ent_ts)
3534 {
3535 struct trace_buffer *buffer = iter->array_buffer->buffer;
3536 struct trace_entry *ent, *next = NULL;
3537 unsigned long lost_events = 0, next_lost = 0;
3538 int cpu_file = iter->cpu_file;
3539 u64 next_ts = 0, ts;
3540 int next_cpu = -1;
3541 int next_size = 0;
3542 int cpu;
3543
3544 /*
3545 * If we are in a per_cpu trace file, don't bother by iterating over
3546 * all cpu and peek directly.
3547 */
3548 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3549 if (ring_buffer_empty_cpu(buffer, cpu_file))
3550 return NULL;
3551 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3552 if (ent_cpu)
3553 *ent_cpu = cpu_file;
3554
3555 return ent;
3556 }
3557
3558 for_each_tracing_cpu(cpu) {
3559
3560 if (ring_buffer_empty_cpu(buffer, cpu))
3561 continue;
3562
3563 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3564
3565 /*
3566 * Pick the entry with the smallest timestamp:
3567 */
3568 if (ent && (!next || ts < next_ts)) {
3569 next = ent;
3570 next_cpu = cpu;
3571 next_ts = ts;
3572 next_lost = lost_events;
3573 next_size = iter->ent_size;
3574 }
3575 }
3576
3577 iter->ent_size = next_size;
3578
3579 if (ent_cpu)
3580 *ent_cpu = next_cpu;
3581
3582 if (ent_ts)
3583 *ent_ts = next_ts;
3584
3585 if (missing_events)
3586 *missing_events = next_lost;
3587
3588 return next;
3589 }
3590
3591 #define STATIC_FMT_BUF_SIZE 128
3592 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3593
trace_iter_expand_format(struct trace_iterator * iter)3594 char *trace_iter_expand_format(struct trace_iterator *iter)
3595 {
3596 char *tmp;
3597
3598 /*
3599 * iter->tr is NULL when used with tp_printk, which makes
3600 * this get called where it is not safe to call krealloc().
3601 */
3602 if (!iter->tr || iter->fmt == static_fmt_buf)
3603 return NULL;
3604
3605 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3606 GFP_KERNEL);
3607 if (tmp) {
3608 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3609 iter->fmt = tmp;
3610 }
3611
3612 return tmp;
3613 }
3614
3615 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3616 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3617 {
3618 unsigned long addr = (unsigned long)str;
3619 struct trace_event *trace_event;
3620 struct trace_event_call *event;
3621
3622 /* OK if part of the event data */
3623 if ((addr >= (unsigned long)iter->ent) &&
3624 (addr < (unsigned long)iter->ent + iter->ent_size))
3625 return true;
3626
3627 /* OK if part of the temp seq buffer */
3628 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3629 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3630 return true;
3631
3632 /* Core rodata can not be freed */
3633 if (is_kernel_rodata(addr))
3634 return true;
3635
3636 if (trace_is_tracepoint_string(str))
3637 return true;
3638
3639 /*
3640 * Now this could be a module event, referencing core module
3641 * data, which is OK.
3642 */
3643 if (!iter->ent)
3644 return false;
3645
3646 trace_event = ftrace_find_event(iter->ent->type);
3647 if (!trace_event)
3648 return false;
3649
3650 event = container_of(trace_event, struct trace_event_call, event);
3651 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3652 return false;
3653
3654 /* Would rather have rodata, but this will suffice */
3655 if (within_module_core(addr, event->module))
3656 return true;
3657
3658 return false;
3659 }
3660
3661 /**
3662 * ignore_event - Check dereferenced fields while writing to the seq buffer
3663 * @iter: The iterator that holds the seq buffer and the event being printed
3664 *
3665 * At boot up, test_event_printk() will flag any event that dereferences
3666 * a string with "%s" that does exist in the ring buffer. It may still
3667 * be valid, as the string may point to a static string in the kernel
3668 * rodata that never gets freed. But if the string pointer is pointing
3669 * to something that was allocated, there's a chance that it can be freed
3670 * by the time the user reads the trace. This would cause a bad memory
3671 * access by the kernel and possibly crash the system.
3672 *
3673 * This function will check if the event has any fields flagged as needing
3674 * to be checked at runtime and perform those checks.
3675 *
3676 * If it is found that a field is unsafe, it will write into the @iter->seq
3677 * a message stating what was found to be unsafe.
3678 *
3679 * @return: true if the event is unsafe and should be ignored,
3680 * false otherwise.
3681 */
ignore_event(struct trace_iterator * iter)3682 bool ignore_event(struct trace_iterator *iter)
3683 {
3684 struct ftrace_event_field *field;
3685 struct trace_event *trace_event;
3686 struct trace_event_call *event;
3687 struct list_head *head;
3688 struct trace_seq *seq;
3689 const void *ptr;
3690
3691 trace_event = ftrace_find_event(iter->ent->type);
3692
3693 seq = &iter->seq;
3694
3695 if (!trace_event) {
3696 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3697 return true;
3698 }
3699
3700 event = container_of(trace_event, struct trace_event_call, event);
3701 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3702 return false;
3703
3704 head = trace_get_fields(event);
3705 if (!head) {
3706 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3707 trace_event_name(event));
3708 return true;
3709 }
3710
3711 /* Offsets are from the iter->ent that points to the raw event */
3712 ptr = iter->ent;
3713
3714 list_for_each_entry(field, head, link) {
3715 const char *str;
3716 bool good;
3717
3718 if (!field->needs_test)
3719 continue;
3720
3721 str = *(const char **)(ptr + field->offset);
3722
3723 good = trace_safe_str(iter, str);
3724
3725 /*
3726 * If you hit this warning, it is likely that the
3727 * trace event in question used %s on a string that
3728 * was saved at the time of the event, but may not be
3729 * around when the trace is read. Use __string(),
3730 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3731 * instead. See samples/trace_events/trace-events-sample.h
3732 * for reference.
3733 */
3734 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3735 trace_event_name(event), field->name)) {
3736 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3737 trace_event_name(event), field->name);
3738 return true;
3739 }
3740 }
3741 return false;
3742 }
3743
trace_event_format(struct trace_iterator * iter,const char * fmt)3744 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3745 {
3746 const char *p, *new_fmt;
3747 char *q;
3748
3749 if (WARN_ON_ONCE(!fmt))
3750 return fmt;
3751
3752 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3753 return fmt;
3754
3755 p = fmt;
3756 new_fmt = q = iter->fmt;
3757 while (*p) {
3758 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3759 if (!trace_iter_expand_format(iter))
3760 return fmt;
3761
3762 q += iter->fmt - new_fmt;
3763 new_fmt = iter->fmt;
3764 }
3765
3766 *q++ = *p++;
3767
3768 /* Replace %p with %px */
3769 if (p[-1] == '%') {
3770 if (p[0] == '%') {
3771 *q++ = *p++;
3772 } else if (p[0] == 'p' && !isalnum(p[1])) {
3773 *q++ = *p++;
3774 *q++ = 'x';
3775 }
3776 }
3777 }
3778 *q = '\0';
3779
3780 return new_fmt;
3781 }
3782
3783 #define STATIC_TEMP_BUF_SIZE 128
3784 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3785
3786 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3787 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3788 int *ent_cpu, u64 *ent_ts)
3789 {
3790 /* __find_next_entry will reset ent_size */
3791 int ent_size = iter->ent_size;
3792 struct trace_entry *entry;
3793
3794 /*
3795 * If called from ftrace_dump(), then the iter->temp buffer
3796 * will be the static_temp_buf and not created from kmalloc.
3797 * If the entry size is greater than the buffer, we can
3798 * not save it. Just return NULL in that case. This is only
3799 * used to add markers when two consecutive events' time
3800 * stamps have a large delta. See trace_print_lat_context()
3801 */
3802 if (iter->temp == static_temp_buf &&
3803 STATIC_TEMP_BUF_SIZE < ent_size)
3804 return NULL;
3805
3806 /*
3807 * The __find_next_entry() may call peek_next_entry(), which may
3808 * call ring_buffer_peek() that may make the contents of iter->ent
3809 * undefined. Need to copy iter->ent now.
3810 */
3811 if (iter->ent && iter->ent != iter->temp) {
3812 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3813 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3814 void *temp;
3815 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3816 if (!temp)
3817 return NULL;
3818 kfree(iter->temp);
3819 iter->temp = temp;
3820 iter->temp_size = iter->ent_size;
3821 }
3822 memcpy(iter->temp, iter->ent, iter->ent_size);
3823 iter->ent = iter->temp;
3824 }
3825 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3826 /* Put back the original ent_size */
3827 iter->ent_size = ent_size;
3828
3829 return entry;
3830 }
3831
3832 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3833 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3834 {
3835 iter->ent = __find_next_entry(iter, &iter->cpu,
3836 &iter->lost_events, &iter->ts);
3837
3838 if (iter->ent)
3839 trace_iterator_increment(iter);
3840
3841 return iter->ent ? iter : NULL;
3842 }
3843
trace_consume(struct trace_iterator * iter)3844 static void trace_consume(struct trace_iterator *iter)
3845 {
3846 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3847 &iter->lost_events);
3848 }
3849
s_next(struct seq_file * m,void * v,loff_t * pos)3850 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3851 {
3852 struct trace_iterator *iter = m->private;
3853 int i = (int)*pos;
3854 void *ent;
3855
3856 WARN_ON_ONCE(iter->leftover);
3857
3858 (*pos)++;
3859
3860 /* can't go backwards */
3861 if (iter->idx > i)
3862 return NULL;
3863
3864 if (iter->idx < 0)
3865 ent = trace_find_next_entry_inc(iter);
3866 else
3867 ent = iter;
3868
3869 while (ent && iter->idx < i)
3870 ent = trace_find_next_entry_inc(iter);
3871
3872 iter->pos = *pos;
3873
3874 return ent;
3875 }
3876
tracing_iter_reset(struct trace_iterator * iter,int cpu)3877 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3878 {
3879 struct ring_buffer_iter *buf_iter;
3880 unsigned long entries = 0;
3881 u64 ts;
3882
3883 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3884
3885 buf_iter = trace_buffer_iter(iter, cpu);
3886 if (!buf_iter)
3887 return;
3888
3889 ring_buffer_iter_reset(buf_iter);
3890
3891 /*
3892 * We could have the case with the max latency tracers
3893 * that a reset never took place on a cpu. This is evident
3894 * by the timestamp being before the start of the buffer.
3895 */
3896 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3897 if (ts >= iter->array_buffer->time_start)
3898 break;
3899 entries++;
3900 ring_buffer_iter_advance(buf_iter);
3901 /* This could be a big loop */
3902 cond_resched();
3903 }
3904
3905 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3906 }
3907
3908 /*
3909 * The current tracer is copied to avoid a global locking
3910 * all around.
3911 */
s_start(struct seq_file * m,loff_t * pos)3912 static void *s_start(struct seq_file *m, loff_t *pos)
3913 {
3914 struct trace_iterator *iter = m->private;
3915 struct trace_array *tr = iter->tr;
3916 int cpu_file = iter->cpu_file;
3917 void *p = NULL;
3918 loff_t l = 0;
3919 int cpu;
3920
3921 mutex_lock(&trace_types_lock);
3922 if (unlikely(tr->current_trace != iter->trace)) {
3923 /* Close iter->trace before switching to the new current tracer */
3924 if (iter->trace->close)
3925 iter->trace->close(iter);
3926 iter->trace = tr->current_trace;
3927 /* Reopen the new current tracer */
3928 if (iter->trace->open)
3929 iter->trace->open(iter);
3930 }
3931 mutex_unlock(&trace_types_lock);
3932
3933 #ifdef CONFIG_TRACER_MAX_TRACE
3934 if (iter->snapshot && iter->trace->use_max_tr)
3935 return ERR_PTR(-EBUSY);
3936 #endif
3937
3938 if (*pos != iter->pos) {
3939 iter->ent = NULL;
3940 iter->cpu = 0;
3941 iter->idx = -1;
3942
3943 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3944 for_each_tracing_cpu(cpu)
3945 tracing_iter_reset(iter, cpu);
3946 } else
3947 tracing_iter_reset(iter, cpu_file);
3948
3949 iter->leftover = 0;
3950 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3951 ;
3952
3953 } else {
3954 /*
3955 * If we overflowed the seq_file before, then we want
3956 * to just reuse the trace_seq buffer again.
3957 */
3958 if (iter->leftover)
3959 p = iter;
3960 else {
3961 l = *pos - 1;
3962 p = s_next(m, p, &l);
3963 }
3964 }
3965
3966 trace_event_read_lock();
3967 trace_access_lock(cpu_file);
3968 return p;
3969 }
3970
s_stop(struct seq_file * m,void * p)3971 static void s_stop(struct seq_file *m, void *p)
3972 {
3973 struct trace_iterator *iter = m->private;
3974
3975 #ifdef CONFIG_TRACER_MAX_TRACE
3976 if (iter->snapshot && iter->trace->use_max_tr)
3977 return;
3978 #endif
3979
3980 trace_access_unlock(iter->cpu_file);
3981 trace_event_read_unlock();
3982 }
3983
3984 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3985 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3986 unsigned long *entries, int cpu)
3987 {
3988 unsigned long count;
3989
3990 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3991 /*
3992 * If this buffer has skipped entries, then we hold all
3993 * entries for the trace and we need to ignore the
3994 * ones before the time stamp.
3995 */
3996 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3997 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3998 /* total is the same as the entries */
3999 *total = count;
4000 } else
4001 *total = count +
4002 ring_buffer_overrun_cpu(buf->buffer, cpu);
4003 *entries = count;
4004 }
4005
4006 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4007 get_total_entries(struct array_buffer *buf,
4008 unsigned long *total, unsigned long *entries)
4009 {
4010 unsigned long t, e;
4011 int cpu;
4012
4013 *total = 0;
4014 *entries = 0;
4015
4016 for_each_tracing_cpu(cpu) {
4017 get_total_entries_cpu(buf, &t, &e, cpu);
4018 *total += t;
4019 *entries += e;
4020 }
4021 }
4022
trace_total_entries_cpu(struct trace_array * tr,int cpu)4023 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4024 {
4025 unsigned long total, entries;
4026
4027 if (!tr)
4028 tr = &global_trace;
4029
4030 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4031
4032 return entries;
4033 }
4034
trace_total_entries(struct trace_array * tr)4035 unsigned long trace_total_entries(struct trace_array *tr)
4036 {
4037 unsigned long total, entries;
4038
4039 if (!tr)
4040 tr = &global_trace;
4041
4042 get_total_entries(&tr->array_buffer, &total, &entries);
4043
4044 return entries;
4045 }
4046
print_lat_help_header(struct seq_file * m)4047 static void print_lat_help_header(struct seq_file *m)
4048 {
4049 seq_puts(m, "# _------=> CPU# \n"
4050 "# / _-----=> irqs-off/BH-disabled\n"
4051 "# | / _----=> need-resched \n"
4052 "# || / _---=> hardirq/softirq \n"
4053 "# ||| / _--=> preempt-depth \n"
4054 "# |||| / _-=> migrate-disable \n"
4055 "# ||||| / delay \n"
4056 "# cmd pid |||||| time | caller \n"
4057 "# \\ / |||||| \\ | / \n");
4058 }
4059
print_event_info(struct array_buffer * buf,struct seq_file * m)4060 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4061 {
4062 unsigned long total;
4063 unsigned long entries;
4064
4065 get_total_entries(buf, &total, &entries);
4066 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4067 entries, total, num_online_cpus());
4068 seq_puts(m, "#\n");
4069 }
4070
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4071 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4072 unsigned int flags)
4073 {
4074 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4075
4076 print_event_info(buf, m);
4077
4078 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4079 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4080 }
4081
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4082 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4083 unsigned int flags)
4084 {
4085 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4086 static const char space[] = " ";
4087 int prec = tgid ? 12 : 2;
4088
4089 print_event_info(buf, m);
4090
4091 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4092 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4093 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4094 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4095 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4096 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4097 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4098 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4099 }
4100
4101 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4102 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4103 {
4104 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4105 struct array_buffer *buf = iter->array_buffer;
4106 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4107 struct tracer *type = iter->trace;
4108 unsigned long entries;
4109 unsigned long total;
4110 const char *name = type->name;
4111
4112 get_total_entries(buf, &total, &entries);
4113
4114 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4115 name, init_utsname()->release);
4116 seq_puts(m, "# -----------------------------------"
4117 "---------------------------------\n");
4118 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4119 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4120 nsecs_to_usecs(data->saved_latency),
4121 entries,
4122 total,
4123 buf->cpu,
4124 preempt_model_none() ? "server" :
4125 preempt_model_voluntary() ? "desktop" :
4126 preempt_model_full() ? "preempt" :
4127 preempt_model_rt() ? "preempt_rt" :
4128 "unknown",
4129 /* These are reserved for later use */
4130 0, 0, 0, 0);
4131 #ifdef CONFIG_SMP
4132 seq_printf(m, " #P:%d)\n", num_online_cpus());
4133 #else
4134 seq_puts(m, ")\n");
4135 #endif
4136 seq_puts(m, "# -----------------\n");
4137 seq_printf(m, "# | task: %.16s-%d "
4138 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4139 data->comm, data->pid,
4140 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4141 data->policy, data->rt_priority);
4142 seq_puts(m, "# -----------------\n");
4143
4144 if (data->critical_start) {
4145 seq_puts(m, "# => started at: ");
4146 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4147 trace_print_seq(m, &iter->seq);
4148 seq_puts(m, "\n# => ended at: ");
4149 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4150 trace_print_seq(m, &iter->seq);
4151 seq_puts(m, "\n#\n");
4152 }
4153
4154 seq_puts(m, "#\n");
4155 }
4156
test_cpu_buff_start(struct trace_iterator * iter)4157 static void test_cpu_buff_start(struct trace_iterator *iter)
4158 {
4159 struct trace_seq *s = &iter->seq;
4160 struct trace_array *tr = iter->tr;
4161
4162 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4163 return;
4164
4165 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4166 return;
4167
4168 if (cpumask_available(iter->started) &&
4169 cpumask_test_cpu(iter->cpu, iter->started))
4170 return;
4171
4172 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4173 return;
4174
4175 if (cpumask_available(iter->started))
4176 cpumask_set_cpu(iter->cpu, iter->started);
4177
4178 /* Don't print started cpu buffer for the first entry of the trace */
4179 if (iter->idx > 1)
4180 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4181 iter->cpu);
4182 }
4183
print_trace_fmt(struct trace_iterator * iter)4184 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4185 {
4186 struct trace_array *tr = iter->tr;
4187 struct trace_seq *s = &iter->seq;
4188 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4189 struct trace_entry *entry;
4190 struct trace_event *event;
4191
4192 entry = iter->ent;
4193
4194 test_cpu_buff_start(iter);
4195
4196 event = ftrace_find_event(entry->type);
4197
4198 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4199 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4200 trace_print_lat_context(iter);
4201 else
4202 trace_print_context(iter);
4203 }
4204
4205 if (trace_seq_has_overflowed(s))
4206 return TRACE_TYPE_PARTIAL_LINE;
4207
4208 if (event) {
4209 if (tr->trace_flags & TRACE_ITER_FIELDS)
4210 return print_event_fields(iter, event);
4211 /*
4212 * For TRACE_EVENT() events, the print_fmt is not
4213 * safe to use if the array has delta offsets
4214 * Force printing via the fields.
4215 */
4216 if ((tr->text_delta || tr->data_delta) &&
4217 event->type > __TRACE_LAST_TYPE)
4218 return print_event_fields(iter, event);
4219
4220 return event->funcs->trace(iter, sym_flags, event);
4221 }
4222
4223 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4224
4225 return trace_handle_return(s);
4226 }
4227
print_raw_fmt(struct trace_iterator * iter)4228 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4229 {
4230 struct trace_array *tr = iter->tr;
4231 struct trace_seq *s = &iter->seq;
4232 struct trace_entry *entry;
4233 struct trace_event *event;
4234
4235 entry = iter->ent;
4236
4237 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4238 trace_seq_printf(s, "%d %d %llu ",
4239 entry->pid, iter->cpu, iter->ts);
4240
4241 if (trace_seq_has_overflowed(s))
4242 return TRACE_TYPE_PARTIAL_LINE;
4243
4244 event = ftrace_find_event(entry->type);
4245 if (event)
4246 return event->funcs->raw(iter, 0, event);
4247
4248 trace_seq_printf(s, "%d ?\n", entry->type);
4249
4250 return trace_handle_return(s);
4251 }
4252
print_hex_fmt(struct trace_iterator * iter)4253 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4254 {
4255 struct trace_array *tr = iter->tr;
4256 struct trace_seq *s = &iter->seq;
4257 unsigned char newline = '\n';
4258 struct trace_entry *entry;
4259 struct trace_event *event;
4260
4261 entry = iter->ent;
4262
4263 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4264 SEQ_PUT_HEX_FIELD(s, entry->pid);
4265 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4266 SEQ_PUT_HEX_FIELD(s, iter->ts);
4267 if (trace_seq_has_overflowed(s))
4268 return TRACE_TYPE_PARTIAL_LINE;
4269 }
4270
4271 event = ftrace_find_event(entry->type);
4272 if (event) {
4273 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4274 if (ret != TRACE_TYPE_HANDLED)
4275 return ret;
4276 }
4277
4278 SEQ_PUT_FIELD(s, newline);
4279
4280 return trace_handle_return(s);
4281 }
4282
print_bin_fmt(struct trace_iterator * iter)4283 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4284 {
4285 struct trace_array *tr = iter->tr;
4286 struct trace_seq *s = &iter->seq;
4287 struct trace_entry *entry;
4288 struct trace_event *event;
4289
4290 entry = iter->ent;
4291
4292 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4293 SEQ_PUT_FIELD(s, entry->pid);
4294 SEQ_PUT_FIELD(s, iter->cpu);
4295 SEQ_PUT_FIELD(s, iter->ts);
4296 if (trace_seq_has_overflowed(s))
4297 return TRACE_TYPE_PARTIAL_LINE;
4298 }
4299
4300 event = ftrace_find_event(entry->type);
4301 return event ? event->funcs->binary(iter, 0, event) :
4302 TRACE_TYPE_HANDLED;
4303 }
4304
trace_empty(struct trace_iterator * iter)4305 int trace_empty(struct trace_iterator *iter)
4306 {
4307 struct ring_buffer_iter *buf_iter;
4308 int cpu;
4309
4310 /* If we are looking at one CPU buffer, only check that one */
4311 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4312 cpu = iter->cpu_file;
4313 buf_iter = trace_buffer_iter(iter, cpu);
4314 if (buf_iter) {
4315 if (!ring_buffer_iter_empty(buf_iter))
4316 return 0;
4317 } else {
4318 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4319 return 0;
4320 }
4321 return 1;
4322 }
4323
4324 for_each_tracing_cpu(cpu) {
4325 buf_iter = trace_buffer_iter(iter, cpu);
4326 if (buf_iter) {
4327 if (!ring_buffer_iter_empty(buf_iter))
4328 return 0;
4329 } else {
4330 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4331 return 0;
4332 }
4333 }
4334
4335 return 1;
4336 }
4337
4338 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4339 enum print_line_t print_trace_line(struct trace_iterator *iter)
4340 {
4341 struct trace_array *tr = iter->tr;
4342 unsigned long trace_flags = tr->trace_flags;
4343 enum print_line_t ret;
4344
4345 if (iter->lost_events) {
4346 if (iter->lost_events == (unsigned long)-1)
4347 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4348 iter->cpu);
4349 else
4350 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4351 iter->cpu, iter->lost_events);
4352 if (trace_seq_has_overflowed(&iter->seq))
4353 return TRACE_TYPE_PARTIAL_LINE;
4354 }
4355
4356 if (iter->trace && iter->trace->print_line) {
4357 ret = iter->trace->print_line(iter);
4358 if (ret != TRACE_TYPE_UNHANDLED)
4359 return ret;
4360 }
4361
4362 if (iter->ent->type == TRACE_BPUTS &&
4363 trace_flags & TRACE_ITER_PRINTK &&
4364 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4365 return trace_print_bputs_msg_only(iter);
4366
4367 if (iter->ent->type == TRACE_BPRINT &&
4368 trace_flags & TRACE_ITER_PRINTK &&
4369 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4370 return trace_print_bprintk_msg_only(iter);
4371
4372 if (iter->ent->type == TRACE_PRINT &&
4373 trace_flags & TRACE_ITER_PRINTK &&
4374 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4375 return trace_print_printk_msg_only(iter);
4376
4377 if (trace_flags & TRACE_ITER_BIN)
4378 return print_bin_fmt(iter);
4379
4380 if (trace_flags & TRACE_ITER_HEX)
4381 return print_hex_fmt(iter);
4382
4383 if (trace_flags & TRACE_ITER_RAW)
4384 return print_raw_fmt(iter);
4385
4386 return print_trace_fmt(iter);
4387 }
4388
trace_latency_header(struct seq_file * m)4389 void trace_latency_header(struct seq_file *m)
4390 {
4391 struct trace_iterator *iter = m->private;
4392 struct trace_array *tr = iter->tr;
4393
4394 /* print nothing if the buffers are empty */
4395 if (trace_empty(iter))
4396 return;
4397
4398 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4399 print_trace_header(m, iter);
4400
4401 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4402 print_lat_help_header(m);
4403 }
4404
trace_default_header(struct seq_file * m)4405 void trace_default_header(struct seq_file *m)
4406 {
4407 struct trace_iterator *iter = m->private;
4408 struct trace_array *tr = iter->tr;
4409 unsigned long trace_flags = tr->trace_flags;
4410
4411 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4412 return;
4413
4414 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4415 /* print nothing if the buffers are empty */
4416 if (trace_empty(iter))
4417 return;
4418 print_trace_header(m, iter);
4419 if (!(trace_flags & TRACE_ITER_VERBOSE))
4420 print_lat_help_header(m);
4421 } else {
4422 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4423 if (trace_flags & TRACE_ITER_IRQ_INFO)
4424 print_func_help_header_irq(iter->array_buffer,
4425 m, trace_flags);
4426 else
4427 print_func_help_header(iter->array_buffer, m,
4428 trace_flags);
4429 }
4430 }
4431 }
4432
test_ftrace_alive(struct seq_file * m)4433 static void test_ftrace_alive(struct seq_file *m)
4434 {
4435 if (!ftrace_is_dead())
4436 return;
4437 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4438 "# MAY BE MISSING FUNCTION EVENTS\n");
4439 }
4440
4441 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4442 static void show_snapshot_main_help(struct seq_file *m)
4443 {
4444 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4445 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4446 "# Takes a snapshot of the main buffer.\n"
4447 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4448 "# (Doesn't have to be '2' works with any number that\n"
4449 "# is not a '0' or '1')\n");
4450 }
4451
show_snapshot_percpu_help(struct seq_file * m)4452 static void show_snapshot_percpu_help(struct seq_file *m)
4453 {
4454 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4455 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4456 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4457 "# Takes a snapshot of the main buffer for this cpu.\n");
4458 #else
4459 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4460 "# Must use main snapshot file to allocate.\n");
4461 #endif
4462 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4463 "# (Doesn't have to be '2' works with any number that\n"
4464 "# is not a '0' or '1')\n");
4465 }
4466
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4467 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4468 {
4469 if (iter->tr->allocated_snapshot)
4470 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4471 else
4472 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4473
4474 seq_puts(m, "# Snapshot commands:\n");
4475 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4476 show_snapshot_main_help(m);
4477 else
4478 show_snapshot_percpu_help(m);
4479 }
4480 #else
4481 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4482 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4483 #endif
4484
s_show(struct seq_file * m,void * v)4485 static int s_show(struct seq_file *m, void *v)
4486 {
4487 struct trace_iterator *iter = v;
4488 int ret;
4489
4490 if (iter->ent == NULL) {
4491 if (iter->tr) {
4492 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4493 seq_puts(m, "#\n");
4494 test_ftrace_alive(m);
4495 }
4496 if (iter->snapshot && trace_empty(iter))
4497 print_snapshot_help(m, iter);
4498 else if (iter->trace && iter->trace->print_header)
4499 iter->trace->print_header(m);
4500 else
4501 trace_default_header(m);
4502
4503 } else if (iter->leftover) {
4504 /*
4505 * If we filled the seq_file buffer earlier, we
4506 * want to just show it now.
4507 */
4508 ret = trace_print_seq(m, &iter->seq);
4509
4510 /* ret should this time be zero, but you never know */
4511 iter->leftover = ret;
4512
4513 } else {
4514 ret = print_trace_line(iter);
4515 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4516 iter->seq.full = 0;
4517 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4518 }
4519 ret = trace_print_seq(m, &iter->seq);
4520 /*
4521 * If we overflow the seq_file buffer, then it will
4522 * ask us for this data again at start up.
4523 * Use that instead.
4524 * ret is 0 if seq_file write succeeded.
4525 * -1 otherwise.
4526 */
4527 iter->leftover = ret;
4528 }
4529
4530 return 0;
4531 }
4532
4533 /*
4534 * Should be used after trace_array_get(), trace_types_lock
4535 * ensures that i_cdev was already initialized.
4536 */
tracing_get_cpu(struct inode * inode)4537 static inline int tracing_get_cpu(struct inode *inode)
4538 {
4539 if (inode->i_cdev) /* See trace_create_cpu_file() */
4540 return (long)inode->i_cdev - 1;
4541 return RING_BUFFER_ALL_CPUS;
4542 }
4543
4544 static const struct seq_operations tracer_seq_ops = {
4545 .start = s_start,
4546 .next = s_next,
4547 .stop = s_stop,
4548 .show = s_show,
4549 };
4550
4551 /*
4552 * Note, as iter itself can be allocated and freed in different
4553 * ways, this function is only used to free its content, and not
4554 * the iterator itself. The only requirement to all the allocations
4555 * is that it must zero all fields (kzalloc), as freeing works with
4556 * ethier allocated content or NULL.
4557 */
free_trace_iter_content(struct trace_iterator * iter)4558 static void free_trace_iter_content(struct trace_iterator *iter)
4559 {
4560 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4561 if (iter->fmt != static_fmt_buf)
4562 kfree(iter->fmt);
4563
4564 kfree(iter->temp);
4565 kfree(iter->buffer_iter);
4566 mutex_destroy(&iter->mutex);
4567 free_cpumask_var(iter->started);
4568 }
4569
4570 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4571 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4572 {
4573 struct trace_array *tr = inode->i_private;
4574 struct trace_iterator *iter;
4575 int cpu;
4576
4577 if (tracing_disabled)
4578 return ERR_PTR(-ENODEV);
4579
4580 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4581 if (!iter)
4582 return ERR_PTR(-ENOMEM);
4583
4584 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4585 GFP_KERNEL);
4586 if (!iter->buffer_iter)
4587 goto release;
4588
4589 /*
4590 * trace_find_next_entry() may need to save off iter->ent.
4591 * It will place it into the iter->temp buffer. As most
4592 * events are less than 128, allocate a buffer of that size.
4593 * If one is greater, then trace_find_next_entry() will
4594 * allocate a new buffer to adjust for the bigger iter->ent.
4595 * It's not critical if it fails to get allocated here.
4596 */
4597 iter->temp = kmalloc(128, GFP_KERNEL);
4598 if (iter->temp)
4599 iter->temp_size = 128;
4600
4601 /*
4602 * trace_event_printf() may need to modify given format
4603 * string to replace %p with %px so that it shows real address
4604 * instead of hash value. However, that is only for the event
4605 * tracing, other tracer may not need. Defer the allocation
4606 * until it is needed.
4607 */
4608 iter->fmt = NULL;
4609 iter->fmt_size = 0;
4610
4611 mutex_lock(&trace_types_lock);
4612 iter->trace = tr->current_trace;
4613
4614 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4615 goto fail;
4616
4617 iter->tr = tr;
4618
4619 #ifdef CONFIG_TRACER_MAX_TRACE
4620 /* Currently only the top directory has a snapshot */
4621 if (tr->current_trace->print_max || snapshot)
4622 iter->array_buffer = &tr->max_buffer;
4623 else
4624 #endif
4625 iter->array_buffer = &tr->array_buffer;
4626 iter->snapshot = snapshot;
4627 iter->pos = -1;
4628 iter->cpu_file = tracing_get_cpu(inode);
4629 mutex_init(&iter->mutex);
4630
4631 /* Notify the tracer early; before we stop tracing. */
4632 if (iter->trace->open)
4633 iter->trace->open(iter);
4634
4635 /* Annotate start of buffers if we had overruns */
4636 if (ring_buffer_overruns(iter->array_buffer->buffer))
4637 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4638
4639 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4640 if (trace_clocks[tr->clock_id].in_ns)
4641 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4642
4643 /*
4644 * If pause-on-trace is enabled, then stop the trace while
4645 * dumping, unless this is the "snapshot" file
4646 */
4647 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4648 tracing_stop_tr(tr);
4649
4650 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4651 for_each_tracing_cpu(cpu) {
4652 iter->buffer_iter[cpu] =
4653 ring_buffer_read_start(iter->array_buffer->buffer,
4654 cpu, GFP_KERNEL);
4655 tracing_iter_reset(iter, cpu);
4656 }
4657 } else {
4658 cpu = iter->cpu_file;
4659 iter->buffer_iter[cpu] =
4660 ring_buffer_read_start(iter->array_buffer->buffer,
4661 cpu, GFP_KERNEL);
4662 tracing_iter_reset(iter, cpu);
4663 }
4664
4665 mutex_unlock(&trace_types_lock);
4666
4667 return iter;
4668
4669 fail:
4670 mutex_unlock(&trace_types_lock);
4671 free_trace_iter_content(iter);
4672 release:
4673 seq_release_private(inode, file);
4674 return ERR_PTR(-ENOMEM);
4675 }
4676
tracing_open_generic(struct inode * inode,struct file * filp)4677 int tracing_open_generic(struct inode *inode, struct file *filp)
4678 {
4679 int ret;
4680
4681 ret = tracing_check_open_get_tr(NULL);
4682 if (ret)
4683 return ret;
4684
4685 filp->private_data = inode->i_private;
4686 return 0;
4687 }
4688
tracing_is_disabled(void)4689 bool tracing_is_disabled(void)
4690 {
4691 return (tracing_disabled) ? true: false;
4692 }
4693
4694 /*
4695 * Open and update trace_array ref count.
4696 * Must have the current trace_array passed to it.
4697 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4698 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4699 {
4700 struct trace_array *tr = inode->i_private;
4701 int ret;
4702
4703 ret = tracing_check_open_get_tr(tr);
4704 if (ret)
4705 return ret;
4706
4707 filp->private_data = inode->i_private;
4708
4709 return 0;
4710 }
4711
4712 /*
4713 * The private pointer of the inode is the trace_event_file.
4714 * Update the tr ref count associated to it.
4715 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4716 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4717 {
4718 struct trace_event_file *file = inode->i_private;
4719 int ret;
4720
4721 ret = tracing_check_open_get_tr(file->tr);
4722 if (ret)
4723 return ret;
4724
4725 mutex_lock(&event_mutex);
4726
4727 /* Fail if the file is marked for removal */
4728 if (file->flags & EVENT_FILE_FL_FREED) {
4729 trace_array_put(file->tr);
4730 ret = -ENODEV;
4731 } else {
4732 event_file_get(file);
4733 }
4734
4735 mutex_unlock(&event_mutex);
4736 if (ret)
4737 return ret;
4738
4739 filp->private_data = inode->i_private;
4740
4741 return 0;
4742 }
4743
tracing_release_file_tr(struct inode * inode,struct file * filp)4744 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4745 {
4746 struct trace_event_file *file = inode->i_private;
4747
4748 trace_array_put(file->tr);
4749 event_file_put(file);
4750
4751 return 0;
4752 }
4753
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4754 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4755 {
4756 tracing_release_file_tr(inode, filp);
4757 return single_release(inode, filp);
4758 }
4759
tracing_mark_open(struct inode * inode,struct file * filp)4760 static int tracing_mark_open(struct inode *inode, struct file *filp)
4761 {
4762 stream_open(inode, filp);
4763 return tracing_open_generic_tr(inode, filp);
4764 }
4765
tracing_release(struct inode * inode,struct file * file)4766 static int tracing_release(struct inode *inode, struct file *file)
4767 {
4768 struct trace_array *tr = inode->i_private;
4769 struct seq_file *m = file->private_data;
4770 struct trace_iterator *iter;
4771 int cpu;
4772
4773 if (!(file->f_mode & FMODE_READ)) {
4774 trace_array_put(tr);
4775 return 0;
4776 }
4777
4778 /* Writes do not use seq_file */
4779 iter = m->private;
4780 mutex_lock(&trace_types_lock);
4781
4782 for_each_tracing_cpu(cpu) {
4783 if (iter->buffer_iter[cpu])
4784 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4785 }
4786
4787 if (iter->trace && iter->trace->close)
4788 iter->trace->close(iter);
4789
4790 if (!iter->snapshot && tr->stop_count)
4791 /* reenable tracing if it was previously enabled */
4792 tracing_start_tr(tr);
4793
4794 __trace_array_put(tr);
4795
4796 mutex_unlock(&trace_types_lock);
4797
4798 free_trace_iter_content(iter);
4799 seq_release_private(inode, file);
4800
4801 return 0;
4802 }
4803
tracing_release_generic_tr(struct inode * inode,struct file * file)4804 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4805 {
4806 struct trace_array *tr = inode->i_private;
4807
4808 trace_array_put(tr);
4809 return 0;
4810 }
4811
tracing_single_release_tr(struct inode * inode,struct file * file)4812 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4813 {
4814 struct trace_array *tr = inode->i_private;
4815
4816 trace_array_put(tr);
4817
4818 return single_release(inode, file);
4819 }
4820
tracing_open(struct inode * inode,struct file * file)4821 static int tracing_open(struct inode *inode, struct file *file)
4822 {
4823 struct trace_array *tr = inode->i_private;
4824 struct trace_iterator *iter;
4825 int ret;
4826
4827 ret = tracing_check_open_get_tr(tr);
4828 if (ret)
4829 return ret;
4830
4831 /* If this file was open for write, then erase contents */
4832 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4833 int cpu = tracing_get_cpu(inode);
4834 struct array_buffer *trace_buf = &tr->array_buffer;
4835
4836 #ifdef CONFIG_TRACER_MAX_TRACE
4837 if (tr->current_trace->print_max)
4838 trace_buf = &tr->max_buffer;
4839 #endif
4840
4841 if (cpu == RING_BUFFER_ALL_CPUS)
4842 tracing_reset_online_cpus(trace_buf);
4843 else
4844 tracing_reset_cpu(trace_buf, cpu);
4845 }
4846
4847 if (file->f_mode & FMODE_READ) {
4848 iter = __tracing_open(inode, file, false);
4849 if (IS_ERR(iter))
4850 ret = PTR_ERR(iter);
4851 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4852 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4853 }
4854
4855 if (ret < 0)
4856 trace_array_put(tr);
4857
4858 return ret;
4859 }
4860
4861 /*
4862 * Some tracers are not suitable for instance buffers.
4863 * A tracer is always available for the global array (toplevel)
4864 * or if it explicitly states that it is.
4865 */
4866 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4867 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4868 {
4869 #ifdef CONFIG_TRACER_SNAPSHOT
4870 /* arrays with mapped buffer range do not have snapshots */
4871 if (tr->range_addr_start && t->use_max_tr)
4872 return false;
4873 #endif
4874 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4875 }
4876
4877 /* Find the next tracer that this trace array may use */
4878 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4879 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4880 {
4881 while (t && !trace_ok_for_array(t, tr))
4882 t = t->next;
4883
4884 return t;
4885 }
4886
4887 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4888 t_next(struct seq_file *m, void *v, loff_t *pos)
4889 {
4890 struct trace_array *tr = m->private;
4891 struct tracer *t = v;
4892
4893 (*pos)++;
4894
4895 if (t)
4896 t = get_tracer_for_array(tr, t->next);
4897
4898 return t;
4899 }
4900
t_start(struct seq_file * m,loff_t * pos)4901 static void *t_start(struct seq_file *m, loff_t *pos)
4902 {
4903 struct trace_array *tr = m->private;
4904 struct tracer *t;
4905 loff_t l = 0;
4906
4907 mutex_lock(&trace_types_lock);
4908
4909 t = get_tracer_for_array(tr, trace_types);
4910 for (; t && l < *pos; t = t_next(m, t, &l))
4911 ;
4912
4913 return t;
4914 }
4915
t_stop(struct seq_file * m,void * p)4916 static void t_stop(struct seq_file *m, void *p)
4917 {
4918 mutex_unlock(&trace_types_lock);
4919 }
4920
t_show(struct seq_file * m,void * v)4921 static int t_show(struct seq_file *m, void *v)
4922 {
4923 struct tracer *t = v;
4924
4925 if (!t)
4926 return 0;
4927
4928 seq_puts(m, t->name);
4929 if (t->next)
4930 seq_putc(m, ' ');
4931 else
4932 seq_putc(m, '\n');
4933
4934 return 0;
4935 }
4936
4937 static const struct seq_operations show_traces_seq_ops = {
4938 .start = t_start,
4939 .next = t_next,
4940 .stop = t_stop,
4941 .show = t_show,
4942 };
4943
show_traces_open(struct inode * inode,struct file * file)4944 static int show_traces_open(struct inode *inode, struct file *file)
4945 {
4946 struct trace_array *tr = inode->i_private;
4947 struct seq_file *m;
4948 int ret;
4949
4950 ret = tracing_check_open_get_tr(tr);
4951 if (ret)
4952 return ret;
4953
4954 ret = seq_open(file, &show_traces_seq_ops);
4955 if (ret) {
4956 trace_array_put(tr);
4957 return ret;
4958 }
4959
4960 m = file->private_data;
4961 m->private = tr;
4962
4963 return 0;
4964 }
4965
tracing_seq_release(struct inode * inode,struct file * file)4966 static int tracing_seq_release(struct inode *inode, struct file *file)
4967 {
4968 struct trace_array *tr = inode->i_private;
4969
4970 trace_array_put(tr);
4971 return seq_release(inode, file);
4972 }
4973
4974 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4975 tracing_write_stub(struct file *filp, const char __user *ubuf,
4976 size_t count, loff_t *ppos)
4977 {
4978 return count;
4979 }
4980
tracing_lseek(struct file * file,loff_t offset,int whence)4981 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4982 {
4983 int ret;
4984
4985 if (file->f_mode & FMODE_READ)
4986 ret = seq_lseek(file, offset, whence);
4987 else
4988 file->f_pos = ret = 0;
4989
4990 return ret;
4991 }
4992
4993 static const struct file_operations tracing_fops = {
4994 .open = tracing_open,
4995 .read = seq_read,
4996 .read_iter = seq_read_iter,
4997 .splice_read = copy_splice_read,
4998 .write = tracing_write_stub,
4999 .llseek = tracing_lseek,
5000 .release = tracing_release,
5001 };
5002
5003 static const struct file_operations show_traces_fops = {
5004 .open = show_traces_open,
5005 .read = seq_read,
5006 .llseek = seq_lseek,
5007 .release = tracing_seq_release,
5008 };
5009
5010 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5011 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5012 size_t count, loff_t *ppos)
5013 {
5014 struct trace_array *tr = file_inode(filp)->i_private;
5015 char *mask_str;
5016 int len;
5017
5018 len = snprintf(NULL, 0, "%*pb\n",
5019 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5020 mask_str = kmalloc(len, GFP_KERNEL);
5021 if (!mask_str)
5022 return -ENOMEM;
5023
5024 len = snprintf(mask_str, len, "%*pb\n",
5025 cpumask_pr_args(tr->tracing_cpumask));
5026 if (len >= count) {
5027 count = -EINVAL;
5028 goto out_err;
5029 }
5030 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5031
5032 out_err:
5033 kfree(mask_str);
5034
5035 return count;
5036 }
5037
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5038 int tracing_set_cpumask(struct trace_array *tr,
5039 cpumask_var_t tracing_cpumask_new)
5040 {
5041 int cpu;
5042
5043 if (!tr)
5044 return -EINVAL;
5045
5046 local_irq_disable();
5047 arch_spin_lock(&tr->max_lock);
5048 for_each_tracing_cpu(cpu) {
5049 /*
5050 * Increase/decrease the disabled counter if we are
5051 * about to flip a bit in the cpumask:
5052 */
5053 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5054 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5055 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5056 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5057 #ifdef CONFIG_TRACER_MAX_TRACE
5058 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5059 #endif
5060 }
5061 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5062 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5063 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5064 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5065 #ifdef CONFIG_TRACER_MAX_TRACE
5066 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5067 #endif
5068 }
5069 }
5070 arch_spin_unlock(&tr->max_lock);
5071 local_irq_enable();
5072
5073 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5074
5075 return 0;
5076 }
5077
5078 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5079 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5080 size_t count, loff_t *ppos)
5081 {
5082 struct trace_array *tr = file_inode(filp)->i_private;
5083 cpumask_var_t tracing_cpumask_new;
5084 int err;
5085
5086 if (count == 0 || count > KMALLOC_MAX_SIZE)
5087 return -EINVAL;
5088
5089 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5090 return -ENOMEM;
5091
5092 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5093 if (err)
5094 goto err_free;
5095
5096 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5097 if (err)
5098 goto err_free;
5099
5100 free_cpumask_var(tracing_cpumask_new);
5101
5102 return count;
5103
5104 err_free:
5105 free_cpumask_var(tracing_cpumask_new);
5106
5107 return err;
5108 }
5109
5110 static const struct file_operations tracing_cpumask_fops = {
5111 .open = tracing_open_generic_tr,
5112 .read = tracing_cpumask_read,
5113 .write = tracing_cpumask_write,
5114 .release = tracing_release_generic_tr,
5115 .llseek = generic_file_llseek,
5116 };
5117
tracing_trace_options_show(struct seq_file * m,void * v)5118 static int tracing_trace_options_show(struct seq_file *m, void *v)
5119 {
5120 struct tracer_opt *trace_opts;
5121 struct trace_array *tr = m->private;
5122 u32 tracer_flags;
5123 int i;
5124
5125 guard(mutex)(&trace_types_lock);
5126
5127 tracer_flags = tr->current_trace->flags->val;
5128 trace_opts = tr->current_trace->flags->opts;
5129
5130 for (i = 0; trace_options[i]; i++) {
5131 if (tr->trace_flags & (1 << i))
5132 seq_printf(m, "%s\n", trace_options[i]);
5133 else
5134 seq_printf(m, "no%s\n", trace_options[i]);
5135 }
5136
5137 for (i = 0; trace_opts[i].name; i++) {
5138 if (tracer_flags & trace_opts[i].bit)
5139 seq_printf(m, "%s\n", trace_opts[i].name);
5140 else
5141 seq_printf(m, "no%s\n", trace_opts[i].name);
5142 }
5143
5144 return 0;
5145 }
5146
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5147 static int __set_tracer_option(struct trace_array *tr,
5148 struct tracer_flags *tracer_flags,
5149 struct tracer_opt *opts, int neg)
5150 {
5151 struct tracer *trace = tracer_flags->trace;
5152 int ret;
5153
5154 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5155 if (ret)
5156 return ret;
5157
5158 if (neg)
5159 tracer_flags->val &= ~opts->bit;
5160 else
5161 tracer_flags->val |= opts->bit;
5162 return 0;
5163 }
5164
5165 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5166 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5167 {
5168 struct tracer *trace = tr->current_trace;
5169 struct tracer_flags *tracer_flags = trace->flags;
5170 struct tracer_opt *opts = NULL;
5171 int i;
5172
5173 for (i = 0; tracer_flags->opts[i].name; i++) {
5174 opts = &tracer_flags->opts[i];
5175
5176 if (strcmp(cmp, opts->name) == 0)
5177 return __set_tracer_option(tr, trace->flags, opts, neg);
5178 }
5179
5180 return -EINVAL;
5181 }
5182
5183 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5184 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5185 {
5186 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5187 return -1;
5188
5189 return 0;
5190 }
5191
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5192 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5193 {
5194 if ((mask == TRACE_ITER_RECORD_TGID) ||
5195 (mask == TRACE_ITER_RECORD_CMD) ||
5196 (mask == TRACE_ITER_TRACE_PRINTK))
5197 lockdep_assert_held(&event_mutex);
5198
5199 /* do nothing if flag is already set */
5200 if (!!(tr->trace_flags & mask) == !!enabled)
5201 return 0;
5202
5203 /* Give the tracer a chance to approve the change */
5204 if (tr->current_trace->flag_changed)
5205 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5206 return -EINVAL;
5207
5208 if (mask == TRACE_ITER_TRACE_PRINTK) {
5209 if (enabled) {
5210 update_printk_trace(tr);
5211 } else {
5212 /*
5213 * The global_trace cannot clear this.
5214 * It's flag only gets cleared if another instance sets it.
5215 */
5216 if (printk_trace == &global_trace)
5217 return -EINVAL;
5218 /*
5219 * An instance must always have it set.
5220 * by default, that's the global_trace instane.
5221 */
5222 if (printk_trace == tr)
5223 update_printk_trace(&global_trace);
5224 }
5225 }
5226
5227 if (enabled)
5228 tr->trace_flags |= mask;
5229 else
5230 tr->trace_flags &= ~mask;
5231
5232 if (mask == TRACE_ITER_RECORD_CMD)
5233 trace_event_enable_cmd_record(enabled);
5234
5235 if (mask == TRACE_ITER_RECORD_TGID) {
5236
5237 if (trace_alloc_tgid_map() < 0) {
5238 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5239 return -ENOMEM;
5240 }
5241
5242 trace_event_enable_tgid_record(enabled);
5243 }
5244
5245 if (mask == TRACE_ITER_EVENT_FORK)
5246 trace_event_follow_fork(tr, enabled);
5247
5248 if (mask == TRACE_ITER_FUNC_FORK)
5249 ftrace_pid_follow_fork(tr, enabled);
5250
5251 if (mask == TRACE_ITER_OVERWRITE) {
5252 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5253 #ifdef CONFIG_TRACER_MAX_TRACE
5254 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5255 #endif
5256 }
5257
5258 if (mask == TRACE_ITER_PRINTK) {
5259 trace_printk_start_stop_comm(enabled);
5260 trace_printk_control(enabled);
5261 }
5262
5263 return 0;
5264 }
5265
trace_set_options(struct trace_array * tr,char * option)5266 int trace_set_options(struct trace_array *tr, char *option)
5267 {
5268 char *cmp;
5269 int neg = 0;
5270 int ret;
5271 size_t orig_len = strlen(option);
5272 int len;
5273
5274 cmp = strstrip(option);
5275
5276 len = str_has_prefix(cmp, "no");
5277 if (len)
5278 neg = 1;
5279
5280 cmp += len;
5281
5282 mutex_lock(&event_mutex);
5283 mutex_lock(&trace_types_lock);
5284
5285 ret = match_string(trace_options, -1, cmp);
5286 /* If no option could be set, test the specific tracer options */
5287 if (ret < 0)
5288 ret = set_tracer_option(tr, cmp, neg);
5289 else
5290 ret = set_tracer_flag(tr, 1 << ret, !neg);
5291
5292 mutex_unlock(&trace_types_lock);
5293 mutex_unlock(&event_mutex);
5294
5295 /*
5296 * If the first trailing whitespace is replaced with '\0' by strstrip,
5297 * turn it back into a space.
5298 */
5299 if (orig_len > strlen(option))
5300 option[strlen(option)] = ' ';
5301
5302 return ret;
5303 }
5304
apply_trace_boot_options(void)5305 static void __init apply_trace_boot_options(void)
5306 {
5307 char *buf = trace_boot_options_buf;
5308 char *option;
5309
5310 while (true) {
5311 option = strsep(&buf, ",");
5312
5313 if (!option)
5314 break;
5315
5316 if (*option)
5317 trace_set_options(&global_trace, option);
5318
5319 /* Put back the comma to allow this to be called again */
5320 if (buf)
5321 *(buf - 1) = ',';
5322 }
5323 }
5324
5325 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5326 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5327 size_t cnt, loff_t *ppos)
5328 {
5329 struct seq_file *m = filp->private_data;
5330 struct trace_array *tr = m->private;
5331 char buf[64];
5332 int ret;
5333
5334 if (cnt >= sizeof(buf))
5335 return -EINVAL;
5336
5337 if (copy_from_user(buf, ubuf, cnt))
5338 return -EFAULT;
5339
5340 buf[cnt] = 0;
5341
5342 ret = trace_set_options(tr, buf);
5343 if (ret < 0)
5344 return ret;
5345
5346 *ppos += cnt;
5347
5348 return cnt;
5349 }
5350
tracing_trace_options_open(struct inode * inode,struct file * file)5351 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5352 {
5353 struct trace_array *tr = inode->i_private;
5354 int ret;
5355
5356 ret = tracing_check_open_get_tr(tr);
5357 if (ret)
5358 return ret;
5359
5360 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5361 if (ret < 0)
5362 trace_array_put(tr);
5363
5364 return ret;
5365 }
5366
5367 static const struct file_operations tracing_iter_fops = {
5368 .open = tracing_trace_options_open,
5369 .read = seq_read,
5370 .llseek = seq_lseek,
5371 .release = tracing_single_release_tr,
5372 .write = tracing_trace_options_write,
5373 };
5374
5375 static const char readme_msg[] =
5376 "tracing mini-HOWTO:\n\n"
5377 "By default tracefs removes all OTH file permission bits.\n"
5378 "When mounting tracefs an optional group id can be specified\n"
5379 "which adds the group to every directory and file in tracefs:\n\n"
5380 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5381 "# echo 0 > tracing_on : quick way to disable tracing\n"
5382 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5383 " Important files:\n"
5384 " trace\t\t\t- The static contents of the buffer\n"
5385 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5386 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5387 " current_tracer\t- function and latency tracers\n"
5388 " available_tracers\t- list of configured tracers for current_tracer\n"
5389 " error_log\t- error log for failed commands (that support it)\n"
5390 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5391 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5392 " trace_clock\t\t- change the clock used to order events\n"
5393 " local: Per cpu clock but may not be synced across CPUs\n"
5394 " global: Synced across CPUs but slows tracing down.\n"
5395 " counter: Not a clock, but just an increment\n"
5396 " uptime: Jiffy counter from time of boot\n"
5397 " perf: Same clock that perf events use\n"
5398 #ifdef CONFIG_X86_64
5399 " x86-tsc: TSC cycle counter\n"
5400 #endif
5401 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5402 " delta: Delta difference against a buffer-wide timestamp\n"
5403 " absolute: Absolute (standalone) timestamp\n"
5404 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5405 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5406 " tracing_cpumask\t- Limit which CPUs to trace\n"
5407 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5408 "\t\t\t Remove sub-buffer with rmdir\n"
5409 " trace_options\t\t- Set format or modify how tracing happens\n"
5410 "\t\t\t Disable an option by prefixing 'no' to the\n"
5411 "\t\t\t option name\n"
5412 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5413 #ifdef CONFIG_DYNAMIC_FTRACE
5414 "\n available_filter_functions - list of functions that can be filtered on\n"
5415 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5416 "\t\t\t functions\n"
5417 "\t accepts: func_full_name or glob-matching-pattern\n"
5418 "\t modules: Can select a group via module\n"
5419 "\t Format: :mod:<module-name>\n"
5420 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5421 "\t triggers: a command to perform when function is hit\n"
5422 "\t Format: <function>:<trigger>[:count]\n"
5423 "\t trigger: traceon, traceoff\n"
5424 "\t\t enable_event:<system>:<event>\n"
5425 "\t\t disable_event:<system>:<event>\n"
5426 #ifdef CONFIG_STACKTRACE
5427 "\t\t stacktrace\n"
5428 #endif
5429 #ifdef CONFIG_TRACER_SNAPSHOT
5430 "\t\t snapshot\n"
5431 #endif
5432 "\t\t dump\n"
5433 "\t\t cpudump\n"
5434 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5435 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5436 "\t The first one will disable tracing every time do_fault is hit\n"
5437 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5438 "\t The first time do trap is hit and it disables tracing, the\n"
5439 "\t counter will decrement to 2. If tracing is already disabled,\n"
5440 "\t the counter will not decrement. It only decrements when the\n"
5441 "\t trigger did work\n"
5442 "\t To remove trigger without count:\n"
5443 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5444 "\t To remove trigger with a count:\n"
5445 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5446 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5447 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5448 "\t modules: Can select a group via module command :mod:\n"
5449 "\t Does not accept triggers\n"
5450 #endif /* CONFIG_DYNAMIC_FTRACE */
5451 #ifdef CONFIG_FUNCTION_TRACER
5452 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5453 "\t\t (function)\n"
5454 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5455 "\t\t (function)\n"
5456 #endif
5457 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5458 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5459 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5460 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5461 #endif
5462 #ifdef CONFIG_TRACER_SNAPSHOT
5463 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5464 "\t\t\t snapshot buffer. Read the contents for more\n"
5465 "\t\t\t information\n"
5466 #endif
5467 #ifdef CONFIG_STACK_TRACER
5468 " stack_trace\t\t- Shows the max stack trace when active\n"
5469 " stack_max_size\t- Shows current max stack size that was traced\n"
5470 "\t\t\t Write into this file to reset the max size (trigger a\n"
5471 "\t\t\t new trace)\n"
5472 #ifdef CONFIG_DYNAMIC_FTRACE
5473 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5474 "\t\t\t traces\n"
5475 #endif
5476 #endif /* CONFIG_STACK_TRACER */
5477 #ifdef CONFIG_DYNAMIC_EVENTS
5478 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5479 "\t\t\t Write into this file to define/undefine new trace events.\n"
5480 #endif
5481 #ifdef CONFIG_KPROBE_EVENTS
5482 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5483 "\t\t\t Write into this file to define/undefine new trace events.\n"
5484 #endif
5485 #ifdef CONFIG_UPROBE_EVENTS
5486 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5487 "\t\t\t Write into this file to define/undefine new trace events.\n"
5488 #endif
5489 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5490 defined(CONFIG_FPROBE_EVENTS)
5491 "\t accepts: event-definitions (one definition per line)\n"
5492 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5493 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5494 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5495 #endif
5496 #ifdef CONFIG_FPROBE_EVENTS
5497 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5498 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5499 #endif
5500 #ifdef CONFIG_HIST_TRIGGERS
5501 "\t s:[synthetic/]<event> <field> [<field>]\n"
5502 #endif
5503 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5504 "\t -:[<group>/][<event>]\n"
5505 #ifdef CONFIG_KPROBE_EVENTS
5506 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5507 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5508 #endif
5509 #ifdef CONFIG_UPROBE_EVENTS
5510 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5511 #endif
5512 "\t args: <name>=fetcharg[:type]\n"
5513 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5514 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5515 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5516 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5517 "\t <argname>[->field[->field|.field...]],\n"
5518 #endif
5519 #else
5520 "\t $stack<index>, $stack, $retval, $comm,\n"
5521 #endif
5522 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5523 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
5524 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5525 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5526 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5527 #ifdef CONFIG_HIST_TRIGGERS
5528 "\t field: <stype> <name>;\n"
5529 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5530 "\t [unsigned] char/int/long\n"
5531 #endif
5532 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5533 "\t of the <attached-group>/<attached-event>.\n"
5534 #endif
5535 " events/\t\t- Directory containing all trace event subsystems:\n"
5536 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5537 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5538 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5539 "\t\t\t events\n"
5540 " filter\t\t- If set, only events passing filter are traced\n"
5541 " events/<system>/<event>/\t- Directory containing control files for\n"
5542 "\t\t\t <event>:\n"
5543 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5544 " filter\t\t- If set, only events passing filter are traced\n"
5545 " trigger\t\t- If set, a command to perform when event is hit\n"
5546 "\t Format: <trigger>[:count][if <filter>]\n"
5547 "\t trigger: traceon, traceoff\n"
5548 "\t enable_event:<system>:<event>\n"
5549 "\t disable_event:<system>:<event>\n"
5550 #ifdef CONFIG_HIST_TRIGGERS
5551 "\t enable_hist:<system>:<event>\n"
5552 "\t disable_hist:<system>:<event>\n"
5553 #endif
5554 #ifdef CONFIG_STACKTRACE
5555 "\t\t stacktrace\n"
5556 #endif
5557 #ifdef CONFIG_TRACER_SNAPSHOT
5558 "\t\t snapshot\n"
5559 #endif
5560 #ifdef CONFIG_HIST_TRIGGERS
5561 "\t\t hist (see below)\n"
5562 #endif
5563 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5564 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5565 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5566 "\t events/block/block_unplug/trigger\n"
5567 "\t The first disables tracing every time block_unplug is hit.\n"
5568 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5569 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5570 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5571 "\t Like function triggers, the counter is only decremented if it\n"
5572 "\t enabled or disabled tracing.\n"
5573 "\t To remove a trigger without a count:\n"
5574 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5575 "\t To remove a trigger with a count:\n"
5576 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5577 "\t Filters can be ignored when removing a trigger.\n"
5578 #ifdef CONFIG_HIST_TRIGGERS
5579 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5580 "\t Format: hist:keys=<field1[,field2,...]>\n"
5581 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5582 "\t [:values=<field1[,field2,...]>]\n"
5583 "\t [:sort=<field1[,field2,...]>]\n"
5584 "\t [:size=#entries]\n"
5585 "\t [:pause][:continue][:clear]\n"
5586 "\t [:name=histname1]\n"
5587 "\t [:nohitcount]\n"
5588 "\t [:<handler>.<action>]\n"
5589 "\t [if <filter>]\n\n"
5590 "\t Note, special fields can be used as well:\n"
5591 "\t common_timestamp - to record current timestamp\n"
5592 "\t common_cpu - to record the CPU the event happened on\n"
5593 "\n"
5594 "\t A hist trigger variable can be:\n"
5595 "\t - a reference to a field e.g. x=current_timestamp,\n"
5596 "\t - a reference to another variable e.g. y=$x,\n"
5597 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5598 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5599 "\n"
5600 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5601 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5602 "\t variable reference, field or numeric literal.\n"
5603 "\n"
5604 "\t When a matching event is hit, an entry is added to a hash\n"
5605 "\t table using the key(s) and value(s) named, and the value of a\n"
5606 "\t sum called 'hitcount' is incremented. Keys and values\n"
5607 "\t correspond to fields in the event's format description. Keys\n"
5608 "\t can be any field, or the special string 'common_stacktrace'.\n"
5609 "\t Compound keys consisting of up to two fields can be specified\n"
5610 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5611 "\t fields. Sort keys consisting of up to two fields can be\n"
5612 "\t specified using the 'sort' keyword. The sort direction can\n"
5613 "\t be modified by appending '.descending' or '.ascending' to a\n"
5614 "\t sort field. The 'size' parameter can be used to specify more\n"
5615 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5616 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5617 "\t its histogram data will be shared with other triggers of the\n"
5618 "\t same name, and trigger hits will update this common data.\n\n"
5619 "\t Reading the 'hist' file for the event will dump the hash\n"
5620 "\t table in its entirety to stdout. If there are multiple hist\n"
5621 "\t triggers attached to an event, there will be a table for each\n"
5622 "\t trigger in the output. The table displayed for a named\n"
5623 "\t trigger will be the same as any other instance having the\n"
5624 "\t same name. The default format used to display a given field\n"
5625 "\t can be modified by appending any of the following modifiers\n"
5626 "\t to the field name, as applicable:\n\n"
5627 "\t .hex display a number as a hex value\n"
5628 "\t .sym display an address as a symbol\n"
5629 "\t .sym-offset display an address as a symbol and offset\n"
5630 "\t .execname display a common_pid as a program name\n"
5631 "\t .syscall display a syscall id as a syscall name\n"
5632 "\t .log2 display log2 value rather than raw number\n"
5633 "\t .buckets=size display values in groups of size rather than raw number\n"
5634 "\t .usecs display a common_timestamp in microseconds\n"
5635 "\t .percent display a number of percentage value\n"
5636 "\t .graph display a bar-graph of a value\n\n"
5637 "\t The 'pause' parameter can be used to pause an existing hist\n"
5638 "\t trigger or to start a hist trigger but not log any events\n"
5639 "\t until told to do so. 'continue' can be used to start or\n"
5640 "\t restart a paused hist trigger.\n\n"
5641 "\t The 'clear' parameter will clear the contents of a running\n"
5642 "\t hist trigger and leave its current paused/active state\n"
5643 "\t unchanged.\n\n"
5644 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5645 "\t raw hitcount in the histogram.\n\n"
5646 "\t The enable_hist and disable_hist triggers can be used to\n"
5647 "\t have one event conditionally start and stop another event's\n"
5648 "\t already-attached hist trigger. The syntax is analogous to\n"
5649 "\t the enable_event and disable_event triggers.\n\n"
5650 "\t Hist trigger handlers and actions are executed whenever a\n"
5651 "\t a histogram entry is added or updated. They take the form:\n\n"
5652 "\t <handler>.<action>\n\n"
5653 "\t The available handlers are:\n\n"
5654 "\t onmatch(matching.event) - invoke on addition or update\n"
5655 "\t onmax(var) - invoke if var exceeds current max\n"
5656 "\t onchange(var) - invoke action if var changes\n\n"
5657 "\t The available actions are:\n\n"
5658 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5659 "\t save(field,...) - save current event fields\n"
5660 #ifdef CONFIG_TRACER_SNAPSHOT
5661 "\t snapshot() - snapshot the trace buffer\n\n"
5662 #endif
5663 #ifdef CONFIG_SYNTH_EVENTS
5664 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5665 "\t Write into this file to define/undefine new synthetic events.\n"
5666 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5667 #endif
5668 #endif
5669 ;
5670
5671 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5672 tracing_readme_read(struct file *filp, char __user *ubuf,
5673 size_t cnt, loff_t *ppos)
5674 {
5675 return simple_read_from_buffer(ubuf, cnt, ppos,
5676 readme_msg, strlen(readme_msg));
5677 }
5678
5679 static const struct file_operations tracing_readme_fops = {
5680 .open = tracing_open_generic,
5681 .read = tracing_readme_read,
5682 .llseek = generic_file_llseek,
5683 };
5684
5685 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5686 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5687 update_eval_map(union trace_eval_map_item *ptr)
5688 {
5689 if (!ptr->map.eval_string) {
5690 if (ptr->tail.next) {
5691 ptr = ptr->tail.next;
5692 /* Set ptr to the next real item (skip head) */
5693 ptr++;
5694 } else
5695 return NULL;
5696 }
5697 return ptr;
5698 }
5699
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5700 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5701 {
5702 union trace_eval_map_item *ptr = v;
5703
5704 /*
5705 * Paranoid! If ptr points to end, we don't want to increment past it.
5706 * This really should never happen.
5707 */
5708 (*pos)++;
5709 ptr = update_eval_map(ptr);
5710 if (WARN_ON_ONCE(!ptr))
5711 return NULL;
5712
5713 ptr++;
5714 ptr = update_eval_map(ptr);
5715
5716 return ptr;
5717 }
5718
eval_map_start(struct seq_file * m,loff_t * pos)5719 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5720 {
5721 union trace_eval_map_item *v;
5722 loff_t l = 0;
5723
5724 mutex_lock(&trace_eval_mutex);
5725
5726 v = trace_eval_maps;
5727 if (v)
5728 v++;
5729
5730 while (v && l < *pos) {
5731 v = eval_map_next(m, v, &l);
5732 }
5733
5734 return v;
5735 }
5736
eval_map_stop(struct seq_file * m,void * v)5737 static void eval_map_stop(struct seq_file *m, void *v)
5738 {
5739 mutex_unlock(&trace_eval_mutex);
5740 }
5741
eval_map_show(struct seq_file * m,void * v)5742 static int eval_map_show(struct seq_file *m, void *v)
5743 {
5744 union trace_eval_map_item *ptr = v;
5745
5746 seq_printf(m, "%s %ld (%s)\n",
5747 ptr->map.eval_string, ptr->map.eval_value,
5748 ptr->map.system);
5749
5750 return 0;
5751 }
5752
5753 static const struct seq_operations tracing_eval_map_seq_ops = {
5754 .start = eval_map_start,
5755 .next = eval_map_next,
5756 .stop = eval_map_stop,
5757 .show = eval_map_show,
5758 };
5759
tracing_eval_map_open(struct inode * inode,struct file * filp)5760 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5761 {
5762 int ret;
5763
5764 ret = tracing_check_open_get_tr(NULL);
5765 if (ret)
5766 return ret;
5767
5768 return seq_open(filp, &tracing_eval_map_seq_ops);
5769 }
5770
5771 static const struct file_operations tracing_eval_map_fops = {
5772 .open = tracing_eval_map_open,
5773 .read = seq_read,
5774 .llseek = seq_lseek,
5775 .release = seq_release,
5776 };
5777
5778 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5779 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5780 {
5781 /* Return tail of array given the head */
5782 return ptr + ptr->head.length + 1;
5783 }
5784
5785 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5786 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5787 int len)
5788 {
5789 struct trace_eval_map **stop;
5790 struct trace_eval_map **map;
5791 union trace_eval_map_item *map_array;
5792 union trace_eval_map_item *ptr;
5793
5794 stop = start + len;
5795
5796 /*
5797 * The trace_eval_maps contains the map plus a head and tail item,
5798 * where the head holds the module and length of array, and the
5799 * tail holds a pointer to the next list.
5800 */
5801 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5802 if (!map_array) {
5803 pr_warn("Unable to allocate trace eval mapping\n");
5804 return;
5805 }
5806
5807 guard(mutex)(&trace_eval_mutex);
5808
5809 if (!trace_eval_maps)
5810 trace_eval_maps = map_array;
5811 else {
5812 ptr = trace_eval_maps;
5813 for (;;) {
5814 ptr = trace_eval_jmp_to_tail(ptr);
5815 if (!ptr->tail.next)
5816 break;
5817 ptr = ptr->tail.next;
5818
5819 }
5820 ptr->tail.next = map_array;
5821 }
5822 map_array->head.mod = mod;
5823 map_array->head.length = len;
5824 map_array++;
5825
5826 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5827 map_array->map = **map;
5828 map_array++;
5829 }
5830 memset(map_array, 0, sizeof(*map_array));
5831 }
5832
trace_create_eval_file(struct dentry * d_tracer)5833 static void trace_create_eval_file(struct dentry *d_tracer)
5834 {
5835 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5836 NULL, &tracing_eval_map_fops);
5837 }
5838
5839 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5840 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5841 static inline void trace_insert_eval_map_file(struct module *mod,
5842 struct trace_eval_map **start, int len) { }
5843 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5844
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5845 static void trace_insert_eval_map(struct module *mod,
5846 struct trace_eval_map **start, int len)
5847 {
5848 struct trace_eval_map **map;
5849
5850 if (len <= 0)
5851 return;
5852
5853 map = start;
5854
5855 trace_event_eval_update(map, len);
5856
5857 trace_insert_eval_map_file(mod, start, len);
5858 }
5859
5860 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5861 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5862 size_t cnt, loff_t *ppos)
5863 {
5864 struct trace_array *tr = filp->private_data;
5865 char buf[MAX_TRACER_SIZE+2];
5866 int r;
5867
5868 mutex_lock(&trace_types_lock);
5869 r = sprintf(buf, "%s\n", tr->current_trace->name);
5870 mutex_unlock(&trace_types_lock);
5871
5872 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5873 }
5874
tracer_init(struct tracer * t,struct trace_array * tr)5875 int tracer_init(struct tracer *t, struct trace_array *tr)
5876 {
5877 tracing_reset_online_cpus(&tr->array_buffer);
5878 return t->init(tr);
5879 }
5880
set_buffer_entries(struct array_buffer * buf,unsigned long val)5881 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5882 {
5883 int cpu;
5884
5885 for_each_tracing_cpu(cpu)
5886 per_cpu_ptr(buf->data, cpu)->entries = val;
5887 }
5888
update_buffer_entries(struct array_buffer * buf,int cpu)5889 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5890 {
5891 if (cpu == RING_BUFFER_ALL_CPUS) {
5892 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5893 } else {
5894 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5895 }
5896 }
5897
5898 #ifdef CONFIG_TRACER_MAX_TRACE
5899 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5900 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5901 struct array_buffer *size_buf, int cpu_id)
5902 {
5903 int cpu, ret = 0;
5904
5905 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5906 for_each_tracing_cpu(cpu) {
5907 ret = ring_buffer_resize(trace_buf->buffer,
5908 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5909 if (ret < 0)
5910 break;
5911 per_cpu_ptr(trace_buf->data, cpu)->entries =
5912 per_cpu_ptr(size_buf->data, cpu)->entries;
5913 }
5914 } else {
5915 ret = ring_buffer_resize(trace_buf->buffer,
5916 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5917 if (ret == 0)
5918 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5919 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5920 }
5921
5922 return ret;
5923 }
5924 #endif /* CONFIG_TRACER_MAX_TRACE */
5925
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5926 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5927 unsigned long size, int cpu)
5928 {
5929 int ret;
5930
5931 /*
5932 * If kernel or user changes the size of the ring buffer
5933 * we use the size that was given, and we can forget about
5934 * expanding it later.
5935 */
5936 trace_set_ring_buffer_expanded(tr);
5937
5938 /* May be called before buffers are initialized */
5939 if (!tr->array_buffer.buffer)
5940 return 0;
5941
5942 /* Do not allow tracing while resizing ring buffer */
5943 tracing_stop_tr(tr);
5944
5945 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5946 if (ret < 0)
5947 goto out_start;
5948
5949 #ifdef CONFIG_TRACER_MAX_TRACE
5950 if (!tr->allocated_snapshot)
5951 goto out;
5952
5953 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5954 if (ret < 0) {
5955 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5956 &tr->array_buffer, cpu);
5957 if (r < 0) {
5958 /*
5959 * AARGH! We are left with different
5960 * size max buffer!!!!
5961 * The max buffer is our "snapshot" buffer.
5962 * When a tracer needs a snapshot (one of the
5963 * latency tracers), it swaps the max buffer
5964 * with the saved snap shot. We succeeded to
5965 * update the size of the main buffer, but failed to
5966 * update the size of the max buffer. But when we tried
5967 * to reset the main buffer to the original size, we
5968 * failed there too. This is very unlikely to
5969 * happen, but if it does, warn and kill all
5970 * tracing.
5971 */
5972 WARN_ON(1);
5973 tracing_disabled = 1;
5974 }
5975 goto out_start;
5976 }
5977
5978 update_buffer_entries(&tr->max_buffer, cpu);
5979
5980 out:
5981 #endif /* CONFIG_TRACER_MAX_TRACE */
5982
5983 update_buffer_entries(&tr->array_buffer, cpu);
5984 out_start:
5985 tracing_start_tr(tr);
5986 return ret;
5987 }
5988
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5989 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5990 unsigned long size, int cpu_id)
5991 {
5992 guard(mutex)(&trace_types_lock);
5993
5994 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5995 /* make sure, this cpu is enabled in the mask */
5996 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5997 return -EINVAL;
5998 }
5999
6000 return __tracing_resize_ring_buffer(tr, size, cpu_id);
6001 }
6002
update_last_data(struct trace_array * tr)6003 static void update_last_data(struct trace_array *tr)
6004 {
6005 if (!tr->text_delta && !tr->data_delta)
6006 return;
6007
6008 /*
6009 * Need to clear all CPU buffers as there cannot be events
6010 * from the previous boot mixed with events with this boot
6011 * as that will cause a confusing trace. Need to clear all
6012 * CPU buffers, even for those that may currently be offline.
6013 */
6014 tracing_reset_all_cpus(&tr->array_buffer);
6015
6016 /* Using current data now */
6017 tr->text_delta = 0;
6018 tr->data_delta = 0;
6019 }
6020
6021 /**
6022 * tracing_update_buffers - used by tracing facility to expand ring buffers
6023 * @tr: The tracing instance
6024 *
6025 * To save on memory when the tracing is never used on a system with it
6026 * configured in. The ring buffers are set to a minimum size. But once
6027 * a user starts to use the tracing facility, then they need to grow
6028 * to their default size.
6029 *
6030 * This function is to be called when a tracer is about to be used.
6031 */
tracing_update_buffers(struct trace_array * tr)6032 int tracing_update_buffers(struct trace_array *tr)
6033 {
6034 int ret = 0;
6035
6036 mutex_lock(&trace_types_lock);
6037
6038 update_last_data(tr);
6039
6040 if (!tr->ring_buffer_expanded)
6041 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6042 RING_BUFFER_ALL_CPUS);
6043 mutex_unlock(&trace_types_lock);
6044
6045 return ret;
6046 }
6047
6048 struct trace_option_dentry;
6049
6050 static void
6051 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6052
6053 /*
6054 * Used to clear out the tracer before deletion of an instance.
6055 * Must have trace_types_lock held.
6056 */
tracing_set_nop(struct trace_array * tr)6057 static void tracing_set_nop(struct trace_array *tr)
6058 {
6059 if (tr->current_trace == &nop_trace)
6060 return;
6061
6062 tr->current_trace->enabled--;
6063
6064 if (tr->current_trace->reset)
6065 tr->current_trace->reset(tr);
6066
6067 tr->current_trace = &nop_trace;
6068 }
6069
6070 static bool tracer_options_updated;
6071
add_tracer_options(struct trace_array * tr,struct tracer * t)6072 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6073 {
6074 /* Only enable if the directory has been created already. */
6075 if (!tr->dir)
6076 return;
6077
6078 /* Only create trace option files after update_tracer_options finish */
6079 if (!tracer_options_updated)
6080 return;
6081
6082 create_trace_option_files(tr, t);
6083 }
6084
tracing_set_tracer(struct trace_array * tr,const char * buf)6085 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6086 {
6087 struct tracer *t;
6088 #ifdef CONFIG_TRACER_MAX_TRACE
6089 bool had_max_tr;
6090 #endif
6091 int ret;
6092
6093 guard(mutex)(&trace_types_lock);
6094
6095 update_last_data(tr);
6096
6097 if (!tr->ring_buffer_expanded) {
6098 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6099 RING_BUFFER_ALL_CPUS);
6100 if (ret < 0)
6101 return ret;
6102 ret = 0;
6103 }
6104
6105 for (t = trace_types; t; t = t->next) {
6106 if (strcmp(t->name, buf) == 0)
6107 break;
6108 }
6109 if (!t)
6110 return -EINVAL;
6111
6112 if (t == tr->current_trace)
6113 return 0;
6114
6115 #ifdef CONFIG_TRACER_SNAPSHOT
6116 if (t->use_max_tr) {
6117 local_irq_disable();
6118 arch_spin_lock(&tr->max_lock);
6119 ret = tr->cond_snapshot ? -EBUSY : 0;
6120 arch_spin_unlock(&tr->max_lock);
6121 local_irq_enable();
6122 if (ret)
6123 return ret;
6124 }
6125 #endif
6126 /* Some tracers won't work on kernel command line */
6127 if (system_state < SYSTEM_RUNNING && t->noboot) {
6128 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6129 t->name);
6130 return 0;
6131 }
6132
6133 /* Some tracers are only allowed for the top level buffer */
6134 if (!trace_ok_for_array(t, tr))
6135 return -EINVAL;
6136
6137 /* If trace pipe files are being read, we can't change the tracer */
6138 if (tr->trace_ref)
6139 return -EBUSY;
6140
6141 trace_branch_disable();
6142
6143 tr->current_trace->enabled--;
6144
6145 if (tr->current_trace->reset)
6146 tr->current_trace->reset(tr);
6147
6148 #ifdef CONFIG_TRACER_MAX_TRACE
6149 had_max_tr = tr->current_trace->use_max_tr;
6150
6151 /* Current trace needs to be nop_trace before synchronize_rcu */
6152 tr->current_trace = &nop_trace;
6153
6154 if (had_max_tr && !t->use_max_tr) {
6155 /*
6156 * We need to make sure that the update_max_tr sees that
6157 * current_trace changed to nop_trace to keep it from
6158 * swapping the buffers after we resize it.
6159 * The update_max_tr is called from interrupts disabled
6160 * so a synchronized_sched() is sufficient.
6161 */
6162 synchronize_rcu();
6163 free_snapshot(tr);
6164 tracing_disarm_snapshot(tr);
6165 }
6166
6167 if (!had_max_tr && t->use_max_tr) {
6168 ret = tracing_arm_snapshot_locked(tr);
6169 if (ret)
6170 return ret;
6171 }
6172 #else
6173 tr->current_trace = &nop_trace;
6174 #endif
6175
6176 if (t->init) {
6177 ret = tracer_init(t, tr);
6178 if (ret) {
6179 #ifdef CONFIG_TRACER_MAX_TRACE
6180 if (t->use_max_tr)
6181 tracing_disarm_snapshot(tr);
6182 #endif
6183 return ret;
6184 }
6185 }
6186
6187 tr->current_trace = t;
6188 tr->current_trace->enabled++;
6189 trace_branch_enable(tr);
6190
6191 return 0;
6192 }
6193
6194 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6195 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6196 size_t cnt, loff_t *ppos)
6197 {
6198 struct trace_array *tr = filp->private_data;
6199 char buf[MAX_TRACER_SIZE+1];
6200 char *name;
6201 size_t ret;
6202 int err;
6203
6204 ret = cnt;
6205
6206 if (cnt > MAX_TRACER_SIZE)
6207 cnt = MAX_TRACER_SIZE;
6208
6209 if (copy_from_user(buf, ubuf, cnt))
6210 return -EFAULT;
6211
6212 buf[cnt] = 0;
6213
6214 name = strim(buf);
6215
6216 err = tracing_set_tracer(tr, name);
6217 if (err)
6218 return err;
6219
6220 *ppos += ret;
6221
6222 return ret;
6223 }
6224
6225 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6226 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6227 size_t cnt, loff_t *ppos)
6228 {
6229 char buf[64];
6230 int r;
6231
6232 r = snprintf(buf, sizeof(buf), "%ld\n",
6233 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6234 if (r > sizeof(buf))
6235 r = sizeof(buf);
6236 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6237 }
6238
6239 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6240 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6241 size_t cnt, loff_t *ppos)
6242 {
6243 unsigned long val;
6244 int ret;
6245
6246 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6247 if (ret)
6248 return ret;
6249
6250 *ptr = val * 1000;
6251
6252 return cnt;
6253 }
6254
6255 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6256 tracing_thresh_read(struct file *filp, char __user *ubuf,
6257 size_t cnt, loff_t *ppos)
6258 {
6259 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6260 }
6261
6262 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6263 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6264 size_t cnt, loff_t *ppos)
6265 {
6266 struct trace_array *tr = filp->private_data;
6267 int ret;
6268
6269 guard(mutex)(&trace_types_lock);
6270 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6271 if (ret < 0)
6272 return ret;
6273
6274 if (tr->current_trace->update_thresh) {
6275 ret = tr->current_trace->update_thresh(tr);
6276 if (ret < 0)
6277 return ret;
6278 }
6279
6280 return cnt;
6281 }
6282
6283 #ifdef CONFIG_TRACER_MAX_TRACE
6284
6285 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6286 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6287 size_t cnt, loff_t *ppos)
6288 {
6289 struct trace_array *tr = filp->private_data;
6290
6291 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6292 }
6293
6294 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6295 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6296 size_t cnt, loff_t *ppos)
6297 {
6298 struct trace_array *tr = filp->private_data;
6299
6300 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6301 }
6302
6303 #endif
6304
open_pipe_on_cpu(struct trace_array * tr,int cpu)6305 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6306 {
6307 if (cpu == RING_BUFFER_ALL_CPUS) {
6308 if (cpumask_empty(tr->pipe_cpumask)) {
6309 cpumask_setall(tr->pipe_cpumask);
6310 return 0;
6311 }
6312 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6313 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6314 return 0;
6315 }
6316 return -EBUSY;
6317 }
6318
close_pipe_on_cpu(struct trace_array * tr,int cpu)6319 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6320 {
6321 if (cpu == RING_BUFFER_ALL_CPUS) {
6322 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6323 cpumask_clear(tr->pipe_cpumask);
6324 } else {
6325 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6326 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6327 }
6328 }
6329
tracing_open_pipe(struct inode * inode,struct file * filp)6330 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6331 {
6332 struct trace_array *tr = inode->i_private;
6333 struct trace_iterator *iter;
6334 int cpu;
6335 int ret;
6336
6337 ret = tracing_check_open_get_tr(tr);
6338 if (ret)
6339 return ret;
6340
6341 mutex_lock(&trace_types_lock);
6342 cpu = tracing_get_cpu(inode);
6343 ret = open_pipe_on_cpu(tr, cpu);
6344 if (ret)
6345 goto fail_pipe_on_cpu;
6346
6347 /* create a buffer to store the information to pass to userspace */
6348 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6349 if (!iter) {
6350 ret = -ENOMEM;
6351 goto fail_alloc_iter;
6352 }
6353
6354 trace_seq_init(&iter->seq);
6355 iter->trace = tr->current_trace;
6356
6357 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6358 ret = -ENOMEM;
6359 goto fail;
6360 }
6361
6362 /* trace pipe does not show start of buffer */
6363 cpumask_setall(iter->started);
6364
6365 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6366 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6367
6368 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6369 if (trace_clocks[tr->clock_id].in_ns)
6370 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6371
6372 iter->tr = tr;
6373 iter->array_buffer = &tr->array_buffer;
6374 iter->cpu_file = cpu;
6375 mutex_init(&iter->mutex);
6376 filp->private_data = iter;
6377
6378 if (iter->trace->pipe_open)
6379 iter->trace->pipe_open(iter);
6380
6381 nonseekable_open(inode, filp);
6382
6383 tr->trace_ref++;
6384
6385 mutex_unlock(&trace_types_lock);
6386 return ret;
6387
6388 fail:
6389 kfree(iter);
6390 fail_alloc_iter:
6391 close_pipe_on_cpu(tr, cpu);
6392 fail_pipe_on_cpu:
6393 __trace_array_put(tr);
6394 mutex_unlock(&trace_types_lock);
6395 return ret;
6396 }
6397
tracing_release_pipe(struct inode * inode,struct file * file)6398 static int tracing_release_pipe(struct inode *inode, struct file *file)
6399 {
6400 struct trace_iterator *iter = file->private_data;
6401 struct trace_array *tr = inode->i_private;
6402
6403 mutex_lock(&trace_types_lock);
6404
6405 tr->trace_ref--;
6406
6407 if (iter->trace->pipe_close)
6408 iter->trace->pipe_close(iter);
6409 close_pipe_on_cpu(tr, iter->cpu_file);
6410 mutex_unlock(&trace_types_lock);
6411
6412 free_trace_iter_content(iter);
6413 kfree(iter);
6414
6415 trace_array_put(tr);
6416
6417 return 0;
6418 }
6419
6420 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6421 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6422 {
6423 struct trace_array *tr = iter->tr;
6424
6425 /* Iterators are static, they should be filled or empty */
6426 if (trace_buffer_iter(iter, iter->cpu_file))
6427 return EPOLLIN | EPOLLRDNORM;
6428
6429 if (tr->trace_flags & TRACE_ITER_BLOCK)
6430 /*
6431 * Always select as readable when in blocking mode
6432 */
6433 return EPOLLIN | EPOLLRDNORM;
6434 else
6435 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6436 filp, poll_table, iter->tr->buffer_percent);
6437 }
6438
6439 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6440 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6441 {
6442 struct trace_iterator *iter = filp->private_data;
6443
6444 return trace_poll(iter, filp, poll_table);
6445 }
6446
6447 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6448 static int tracing_wait_pipe(struct file *filp)
6449 {
6450 struct trace_iterator *iter = filp->private_data;
6451 int ret;
6452
6453 while (trace_empty(iter)) {
6454
6455 if ((filp->f_flags & O_NONBLOCK)) {
6456 return -EAGAIN;
6457 }
6458
6459 /*
6460 * We block until we read something and tracing is disabled.
6461 * We still block if tracing is disabled, but we have never
6462 * read anything. This allows a user to cat this file, and
6463 * then enable tracing. But after we have read something,
6464 * we give an EOF when tracing is again disabled.
6465 *
6466 * iter->pos will be 0 if we haven't read anything.
6467 */
6468 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6469 break;
6470
6471 mutex_unlock(&iter->mutex);
6472
6473 ret = wait_on_pipe(iter, 0);
6474
6475 mutex_lock(&iter->mutex);
6476
6477 if (ret)
6478 return ret;
6479 }
6480
6481 return 1;
6482 }
6483
6484 /*
6485 * Consumer reader.
6486 */
6487 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6488 tracing_read_pipe(struct file *filp, char __user *ubuf,
6489 size_t cnt, loff_t *ppos)
6490 {
6491 struct trace_iterator *iter = filp->private_data;
6492 ssize_t sret;
6493
6494 /*
6495 * Avoid more than one consumer on a single file descriptor
6496 * This is just a matter of traces coherency, the ring buffer itself
6497 * is protected.
6498 */
6499 guard(mutex)(&iter->mutex);
6500
6501 /* return any leftover data */
6502 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6503 if (sret != -EBUSY)
6504 return sret;
6505
6506 trace_seq_init(&iter->seq);
6507
6508 if (iter->trace->read) {
6509 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6510 if (sret)
6511 return sret;
6512 }
6513
6514 waitagain:
6515 sret = tracing_wait_pipe(filp);
6516 if (sret <= 0)
6517 return sret;
6518
6519 /* stop when tracing is finished */
6520 if (trace_empty(iter))
6521 return 0;
6522
6523 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6524 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6525
6526 /* reset all but tr, trace, and overruns */
6527 trace_iterator_reset(iter);
6528 cpumask_clear(iter->started);
6529 trace_seq_init(&iter->seq);
6530
6531 trace_event_read_lock();
6532 trace_access_lock(iter->cpu_file);
6533 while (trace_find_next_entry_inc(iter) != NULL) {
6534 enum print_line_t ret;
6535 int save_len = iter->seq.seq.len;
6536
6537 ret = print_trace_line(iter);
6538 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6539 /*
6540 * If one print_trace_line() fills entire trace_seq in one shot,
6541 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6542 * In this case, we need to consume it, otherwise, loop will peek
6543 * this event next time, resulting in an infinite loop.
6544 */
6545 if (save_len == 0) {
6546 iter->seq.full = 0;
6547 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6548 trace_consume(iter);
6549 break;
6550 }
6551
6552 /* In other cases, don't print partial lines */
6553 iter->seq.seq.len = save_len;
6554 break;
6555 }
6556 if (ret != TRACE_TYPE_NO_CONSUME)
6557 trace_consume(iter);
6558
6559 if (trace_seq_used(&iter->seq) >= cnt)
6560 break;
6561
6562 /*
6563 * Setting the full flag means we reached the trace_seq buffer
6564 * size and we should leave by partial output condition above.
6565 * One of the trace_seq_* functions is not used properly.
6566 */
6567 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6568 iter->ent->type);
6569 }
6570 trace_access_unlock(iter->cpu_file);
6571 trace_event_read_unlock();
6572
6573 /* Now copy what we have to the user */
6574 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6575 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6576 trace_seq_init(&iter->seq);
6577
6578 /*
6579 * If there was nothing to send to user, in spite of consuming trace
6580 * entries, go back to wait for more entries.
6581 */
6582 if (sret == -EBUSY)
6583 goto waitagain;
6584
6585 return sret;
6586 }
6587
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6588 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6589 unsigned int idx)
6590 {
6591 __free_page(spd->pages[idx]);
6592 }
6593
6594 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6595 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6596 {
6597 size_t count;
6598 int save_len;
6599 int ret;
6600
6601 /* Seq buffer is page-sized, exactly what we need. */
6602 for (;;) {
6603 save_len = iter->seq.seq.len;
6604 ret = print_trace_line(iter);
6605
6606 if (trace_seq_has_overflowed(&iter->seq)) {
6607 iter->seq.seq.len = save_len;
6608 break;
6609 }
6610
6611 /*
6612 * This should not be hit, because it should only
6613 * be set if the iter->seq overflowed. But check it
6614 * anyway to be safe.
6615 */
6616 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6617 iter->seq.seq.len = save_len;
6618 break;
6619 }
6620
6621 count = trace_seq_used(&iter->seq) - save_len;
6622 if (rem < count) {
6623 rem = 0;
6624 iter->seq.seq.len = save_len;
6625 break;
6626 }
6627
6628 if (ret != TRACE_TYPE_NO_CONSUME)
6629 trace_consume(iter);
6630 rem -= count;
6631 if (!trace_find_next_entry_inc(iter)) {
6632 rem = 0;
6633 iter->ent = NULL;
6634 break;
6635 }
6636 }
6637
6638 return rem;
6639 }
6640
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6641 static ssize_t tracing_splice_read_pipe(struct file *filp,
6642 loff_t *ppos,
6643 struct pipe_inode_info *pipe,
6644 size_t len,
6645 unsigned int flags)
6646 {
6647 struct page *pages_def[PIPE_DEF_BUFFERS];
6648 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6649 struct trace_iterator *iter = filp->private_data;
6650 struct splice_pipe_desc spd = {
6651 .pages = pages_def,
6652 .partial = partial_def,
6653 .nr_pages = 0, /* This gets updated below. */
6654 .nr_pages_max = PIPE_DEF_BUFFERS,
6655 .ops = &default_pipe_buf_ops,
6656 .spd_release = tracing_spd_release_pipe,
6657 };
6658 ssize_t ret;
6659 size_t rem;
6660 unsigned int i;
6661
6662 if (splice_grow_spd(pipe, &spd))
6663 return -ENOMEM;
6664
6665 mutex_lock(&iter->mutex);
6666
6667 if (iter->trace->splice_read) {
6668 ret = iter->trace->splice_read(iter, filp,
6669 ppos, pipe, len, flags);
6670 if (ret)
6671 goto out_err;
6672 }
6673
6674 ret = tracing_wait_pipe(filp);
6675 if (ret <= 0)
6676 goto out_err;
6677
6678 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6679 ret = -EFAULT;
6680 goto out_err;
6681 }
6682
6683 trace_event_read_lock();
6684 trace_access_lock(iter->cpu_file);
6685
6686 /* Fill as many pages as possible. */
6687 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6688 spd.pages[i] = alloc_page(GFP_KERNEL);
6689 if (!spd.pages[i])
6690 break;
6691
6692 rem = tracing_fill_pipe_page(rem, iter);
6693
6694 /* Copy the data into the page, so we can start over. */
6695 ret = trace_seq_to_buffer(&iter->seq,
6696 page_address(spd.pages[i]),
6697 min((size_t)trace_seq_used(&iter->seq),
6698 (size_t)PAGE_SIZE));
6699 if (ret < 0) {
6700 __free_page(spd.pages[i]);
6701 break;
6702 }
6703 spd.partial[i].offset = 0;
6704 spd.partial[i].len = ret;
6705
6706 trace_seq_init(&iter->seq);
6707 }
6708
6709 trace_access_unlock(iter->cpu_file);
6710 trace_event_read_unlock();
6711 mutex_unlock(&iter->mutex);
6712
6713 spd.nr_pages = i;
6714
6715 if (i)
6716 ret = splice_to_pipe(pipe, &spd);
6717 else
6718 ret = 0;
6719 out:
6720 splice_shrink_spd(&spd);
6721 return ret;
6722
6723 out_err:
6724 mutex_unlock(&iter->mutex);
6725 goto out;
6726 }
6727
6728 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6729 tracing_entries_read(struct file *filp, char __user *ubuf,
6730 size_t cnt, loff_t *ppos)
6731 {
6732 struct inode *inode = file_inode(filp);
6733 struct trace_array *tr = inode->i_private;
6734 int cpu = tracing_get_cpu(inode);
6735 char buf[64];
6736 int r = 0;
6737 ssize_t ret;
6738
6739 mutex_lock(&trace_types_lock);
6740
6741 if (cpu == RING_BUFFER_ALL_CPUS) {
6742 int cpu, buf_size_same;
6743 unsigned long size;
6744
6745 size = 0;
6746 buf_size_same = 1;
6747 /* check if all cpu sizes are same */
6748 for_each_tracing_cpu(cpu) {
6749 /* fill in the size from first enabled cpu */
6750 if (size == 0)
6751 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6752 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6753 buf_size_same = 0;
6754 break;
6755 }
6756 }
6757
6758 if (buf_size_same) {
6759 if (!tr->ring_buffer_expanded)
6760 r = sprintf(buf, "%lu (expanded: %lu)\n",
6761 size >> 10,
6762 trace_buf_size >> 10);
6763 else
6764 r = sprintf(buf, "%lu\n", size >> 10);
6765 } else
6766 r = sprintf(buf, "X\n");
6767 } else
6768 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6769
6770 mutex_unlock(&trace_types_lock);
6771
6772 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6773 return ret;
6774 }
6775
6776 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6777 tracing_entries_write(struct file *filp, const char __user *ubuf,
6778 size_t cnt, loff_t *ppos)
6779 {
6780 struct inode *inode = file_inode(filp);
6781 struct trace_array *tr = inode->i_private;
6782 unsigned long val;
6783 int ret;
6784
6785 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6786 if (ret)
6787 return ret;
6788
6789 /* must have at least 1 entry */
6790 if (!val)
6791 return -EINVAL;
6792
6793 /* value is in KB */
6794 val <<= 10;
6795 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6796 if (ret < 0)
6797 return ret;
6798
6799 *ppos += cnt;
6800
6801 return cnt;
6802 }
6803
6804 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6805 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6806 size_t cnt, loff_t *ppos)
6807 {
6808 struct trace_array *tr = filp->private_data;
6809 char buf[64];
6810 int r, cpu;
6811 unsigned long size = 0, expanded_size = 0;
6812
6813 mutex_lock(&trace_types_lock);
6814 for_each_tracing_cpu(cpu) {
6815 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6816 if (!tr->ring_buffer_expanded)
6817 expanded_size += trace_buf_size >> 10;
6818 }
6819 if (tr->ring_buffer_expanded)
6820 r = sprintf(buf, "%lu\n", size);
6821 else
6822 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6823 mutex_unlock(&trace_types_lock);
6824
6825 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6826 }
6827
6828 static ssize_t
tracing_last_boot_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6829 tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6830 {
6831 struct trace_array *tr = filp->private_data;
6832 struct seq_buf seq;
6833 char buf[64];
6834
6835 seq_buf_init(&seq, buf, 64);
6836
6837 seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
6838 seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
6839
6840 return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
6841 }
6842
tracing_buffer_meta_open(struct inode * inode,struct file * filp)6843 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6844 {
6845 struct trace_array *tr = inode->i_private;
6846 int cpu = tracing_get_cpu(inode);
6847 int ret;
6848
6849 ret = tracing_check_open_get_tr(tr);
6850 if (ret)
6851 return ret;
6852
6853 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6854 if (ret < 0)
6855 __trace_array_put(tr);
6856 return ret;
6857 }
6858
6859 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6860 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6861 size_t cnt, loff_t *ppos)
6862 {
6863 /*
6864 * There is no need to read what the user has written, this function
6865 * is just to make sure that there is no error when "echo" is used
6866 */
6867
6868 *ppos += cnt;
6869
6870 return cnt;
6871 }
6872
6873 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6874 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6875 {
6876 struct trace_array *tr = inode->i_private;
6877
6878 /* disable tracing ? */
6879 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6880 tracer_tracing_off(tr);
6881 /* resize the ring buffer to 0 */
6882 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6883
6884 trace_array_put(tr);
6885
6886 return 0;
6887 }
6888
6889 #define TRACE_MARKER_MAX_SIZE 4096
6890
6891 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6892 tracing_mark_write(struct file *filp, const char __user *ubuf,
6893 size_t cnt, loff_t *fpos)
6894 {
6895 struct trace_array *tr = filp->private_data;
6896 struct ring_buffer_event *event;
6897 enum event_trigger_type tt = ETT_NONE;
6898 struct trace_buffer *buffer;
6899 struct print_entry *entry;
6900 int meta_size;
6901 ssize_t written;
6902 size_t size;
6903 int len;
6904
6905 /* Used in tracing_mark_raw_write() as well */
6906 #define FAULTED_STR "<faulted>"
6907 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6908
6909 if (tracing_disabled)
6910 return -EINVAL;
6911
6912 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6913 return -EINVAL;
6914
6915 if ((ssize_t)cnt < 0)
6916 return -EINVAL;
6917
6918 if (cnt > TRACE_MARKER_MAX_SIZE)
6919 cnt = TRACE_MARKER_MAX_SIZE;
6920
6921 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
6922 again:
6923 size = cnt + meta_size;
6924
6925 /* If less than "<faulted>", then make sure we can still add that */
6926 if (cnt < FAULTED_SIZE)
6927 size += FAULTED_SIZE - cnt;
6928
6929 buffer = tr->array_buffer.buffer;
6930 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6931 tracing_gen_ctx());
6932 if (unlikely(!event)) {
6933 /*
6934 * If the size was greater than what was allowed, then
6935 * make it smaller and try again.
6936 */
6937 if (size > ring_buffer_max_event_size(buffer)) {
6938 /* cnt < FAULTED size should never be bigger than max */
6939 if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6940 return -EBADF;
6941 cnt = ring_buffer_max_event_size(buffer) - meta_size;
6942 /* The above should only happen once */
6943 if (WARN_ON_ONCE(cnt + meta_size == size))
6944 return -EBADF;
6945 goto again;
6946 }
6947
6948 /* Ring buffer disabled, return as if not open for write */
6949 return -EBADF;
6950 }
6951
6952 entry = ring_buffer_event_data(event);
6953 entry->ip = _THIS_IP_;
6954
6955 len = copy_from_user_nofault(&entry->buf, ubuf, cnt);
6956 if (len) {
6957 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6958 cnt = FAULTED_SIZE;
6959 written = -EFAULT;
6960 } else
6961 written = cnt;
6962
6963 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6964 /* do not add \n before testing triggers, but add \0 */
6965 entry->buf[cnt] = '\0';
6966 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6967 }
6968
6969 if (entry->buf[cnt - 1] != '\n') {
6970 entry->buf[cnt] = '\n';
6971 entry->buf[cnt + 1] = '\0';
6972 } else
6973 entry->buf[cnt] = '\0';
6974
6975 if (static_branch_unlikely(&trace_marker_exports_enabled))
6976 ftrace_exports(event, TRACE_EXPORT_MARKER);
6977 __buffer_unlock_commit(buffer, event);
6978
6979 if (tt)
6980 event_triggers_post_call(tr->trace_marker_file, tt);
6981
6982 return written;
6983 }
6984
6985 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6986 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6987 size_t cnt, loff_t *fpos)
6988 {
6989 struct trace_array *tr = filp->private_data;
6990 struct ring_buffer_event *event;
6991 struct trace_buffer *buffer;
6992 struct raw_data_entry *entry;
6993 ssize_t written;
6994 int size;
6995 int len;
6996
6997 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6998
6999 if (tracing_disabled)
7000 return -EINVAL;
7001
7002 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7003 return -EINVAL;
7004
7005 /* The marker must at least have a tag id */
7006 if (cnt < sizeof(unsigned int))
7007 return -EINVAL;
7008
7009 size = sizeof(*entry) + cnt;
7010 if (cnt < FAULT_SIZE_ID)
7011 size += FAULT_SIZE_ID - cnt;
7012
7013 buffer = tr->array_buffer.buffer;
7014
7015 if (size > ring_buffer_max_event_size(buffer))
7016 return -EINVAL;
7017
7018 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7019 tracing_gen_ctx());
7020 if (!event)
7021 /* Ring buffer disabled, return as if not open for write */
7022 return -EBADF;
7023
7024 entry = ring_buffer_event_data(event);
7025
7026 len = copy_from_user_nofault(&entry->id, ubuf, cnt);
7027 if (len) {
7028 entry->id = -1;
7029 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7030 written = -EFAULT;
7031 } else
7032 written = cnt;
7033
7034 __buffer_unlock_commit(buffer, event);
7035
7036 return written;
7037 }
7038
tracing_clock_show(struct seq_file * m,void * v)7039 static int tracing_clock_show(struct seq_file *m, void *v)
7040 {
7041 struct trace_array *tr = m->private;
7042 int i;
7043
7044 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7045 seq_printf(m,
7046 "%s%s%s%s", i ? " " : "",
7047 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7048 i == tr->clock_id ? "]" : "");
7049 seq_putc(m, '\n');
7050
7051 return 0;
7052 }
7053
tracing_set_clock(struct trace_array * tr,const char * clockstr)7054 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7055 {
7056 int i;
7057
7058 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7059 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7060 break;
7061 }
7062 if (i == ARRAY_SIZE(trace_clocks))
7063 return -EINVAL;
7064
7065 mutex_lock(&trace_types_lock);
7066
7067 tr->clock_id = i;
7068
7069 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7070
7071 /*
7072 * New clock may not be consistent with the previous clock.
7073 * Reset the buffer so that it doesn't have incomparable timestamps.
7074 */
7075 tracing_reset_online_cpus(&tr->array_buffer);
7076
7077 #ifdef CONFIG_TRACER_MAX_TRACE
7078 if (tr->max_buffer.buffer)
7079 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7080 tracing_reset_online_cpus(&tr->max_buffer);
7081 #endif
7082
7083 mutex_unlock(&trace_types_lock);
7084
7085 return 0;
7086 }
7087
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7088 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7089 size_t cnt, loff_t *fpos)
7090 {
7091 struct seq_file *m = filp->private_data;
7092 struct trace_array *tr = m->private;
7093 char buf[64];
7094 const char *clockstr;
7095 int ret;
7096
7097 if (cnt >= sizeof(buf))
7098 return -EINVAL;
7099
7100 if (copy_from_user(buf, ubuf, cnt))
7101 return -EFAULT;
7102
7103 buf[cnt] = 0;
7104
7105 clockstr = strstrip(buf);
7106
7107 ret = tracing_set_clock(tr, clockstr);
7108 if (ret)
7109 return ret;
7110
7111 *fpos += cnt;
7112
7113 return cnt;
7114 }
7115
tracing_clock_open(struct inode * inode,struct file * file)7116 static int tracing_clock_open(struct inode *inode, struct file *file)
7117 {
7118 struct trace_array *tr = inode->i_private;
7119 int ret;
7120
7121 ret = tracing_check_open_get_tr(tr);
7122 if (ret)
7123 return ret;
7124
7125 ret = single_open(file, tracing_clock_show, inode->i_private);
7126 if (ret < 0)
7127 trace_array_put(tr);
7128
7129 return ret;
7130 }
7131
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7132 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7133 {
7134 struct trace_array *tr = m->private;
7135
7136 mutex_lock(&trace_types_lock);
7137
7138 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7139 seq_puts(m, "delta [absolute]\n");
7140 else
7141 seq_puts(m, "[delta] absolute\n");
7142
7143 mutex_unlock(&trace_types_lock);
7144
7145 return 0;
7146 }
7147
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7148 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7149 {
7150 struct trace_array *tr = inode->i_private;
7151 int ret;
7152
7153 ret = tracing_check_open_get_tr(tr);
7154 if (ret)
7155 return ret;
7156
7157 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7158 if (ret < 0)
7159 trace_array_put(tr);
7160
7161 return ret;
7162 }
7163
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7164 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7165 {
7166 if (rbe == this_cpu_read(trace_buffered_event))
7167 return ring_buffer_time_stamp(buffer);
7168
7169 return ring_buffer_event_time_stamp(buffer, rbe);
7170 }
7171
7172 /*
7173 * Set or disable using the per CPU trace_buffer_event when possible.
7174 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7175 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7176 {
7177 guard(mutex)(&trace_types_lock);
7178
7179 if (set && tr->no_filter_buffering_ref++)
7180 return 0;
7181
7182 if (!set) {
7183 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7184 return -EINVAL;
7185
7186 --tr->no_filter_buffering_ref;
7187 }
7188
7189 return 0;
7190 }
7191
7192 struct ftrace_buffer_info {
7193 struct trace_iterator iter;
7194 void *spare;
7195 unsigned int spare_cpu;
7196 unsigned int spare_size;
7197 unsigned int read;
7198 };
7199
7200 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7201 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7202 {
7203 struct trace_array *tr = inode->i_private;
7204 struct trace_iterator *iter;
7205 struct seq_file *m;
7206 int ret;
7207
7208 ret = tracing_check_open_get_tr(tr);
7209 if (ret)
7210 return ret;
7211
7212 if (file->f_mode & FMODE_READ) {
7213 iter = __tracing_open(inode, file, true);
7214 if (IS_ERR(iter))
7215 ret = PTR_ERR(iter);
7216 } else {
7217 /* Writes still need the seq_file to hold the private data */
7218 ret = -ENOMEM;
7219 m = kzalloc(sizeof(*m), GFP_KERNEL);
7220 if (!m)
7221 goto out;
7222 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7223 if (!iter) {
7224 kfree(m);
7225 goto out;
7226 }
7227 ret = 0;
7228
7229 iter->tr = tr;
7230 iter->array_buffer = &tr->max_buffer;
7231 iter->cpu_file = tracing_get_cpu(inode);
7232 m->private = iter;
7233 file->private_data = m;
7234 }
7235 out:
7236 if (ret < 0)
7237 trace_array_put(tr);
7238
7239 return ret;
7240 }
7241
tracing_swap_cpu_buffer(void * tr)7242 static void tracing_swap_cpu_buffer(void *tr)
7243 {
7244 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7245 }
7246
7247 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7248 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7249 loff_t *ppos)
7250 {
7251 struct seq_file *m = filp->private_data;
7252 struct trace_iterator *iter = m->private;
7253 struct trace_array *tr = iter->tr;
7254 unsigned long val;
7255 int ret;
7256
7257 ret = tracing_update_buffers(tr);
7258 if (ret < 0)
7259 return ret;
7260
7261 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7262 if (ret)
7263 return ret;
7264
7265 guard(mutex)(&trace_types_lock);
7266
7267 if (tr->current_trace->use_max_tr)
7268 return -EBUSY;
7269
7270 local_irq_disable();
7271 arch_spin_lock(&tr->max_lock);
7272 if (tr->cond_snapshot)
7273 ret = -EBUSY;
7274 arch_spin_unlock(&tr->max_lock);
7275 local_irq_enable();
7276 if (ret)
7277 return ret;
7278
7279 switch (val) {
7280 case 0:
7281 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7282 return -EINVAL;
7283 if (tr->allocated_snapshot)
7284 free_snapshot(tr);
7285 break;
7286 case 1:
7287 /* Only allow per-cpu swap if the ring buffer supports it */
7288 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7289 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7290 return -EINVAL;
7291 #endif
7292 if (tr->allocated_snapshot)
7293 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7294 &tr->array_buffer, iter->cpu_file);
7295
7296 ret = tracing_arm_snapshot_locked(tr);
7297 if (ret)
7298 return ret;
7299
7300 /* Now, we're going to swap */
7301 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7302 local_irq_disable();
7303 update_max_tr(tr, current, smp_processor_id(), NULL);
7304 local_irq_enable();
7305 } else {
7306 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7307 (void *)tr, 1);
7308 }
7309 tracing_disarm_snapshot(tr);
7310 break;
7311 default:
7312 if (tr->allocated_snapshot) {
7313 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7314 tracing_reset_online_cpus(&tr->max_buffer);
7315 else
7316 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7317 }
7318 break;
7319 }
7320
7321 if (ret >= 0) {
7322 *ppos += cnt;
7323 ret = cnt;
7324 }
7325
7326 return ret;
7327 }
7328
tracing_snapshot_release(struct inode * inode,struct file * file)7329 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7330 {
7331 struct seq_file *m = file->private_data;
7332 int ret;
7333
7334 ret = tracing_release(inode, file);
7335
7336 if (file->f_mode & FMODE_READ)
7337 return ret;
7338
7339 /* If write only, the seq_file is just a stub */
7340 if (m)
7341 kfree(m->private);
7342 kfree(m);
7343
7344 return 0;
7345 }
7346
7347 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7348 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7349 size_t count, loff_t *ppos);
7350 static int tracing_buffers_release(struct inode *inode, struct file *file);
7351 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7352 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7353
snapshot_raw_open(struct inode * inode,struct file * filp)7354 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7355 {
7356 struct ftrace_buffer_info *info;
7357 int ret;
7358
7359 /* The following checks for tracefs lockdown */
7360 ret = tracing_buffers_open(inode, filp);
7361 if (ret < 0)
7362 return ret;
7363
7364 info = filp->private_data;
7365
7366 if (info->iter.trace->use_max_tr) {
7367 tracing_buffers_release(inode, filp);
7368 return -EBUSY;
7369 }
7370
7371 info->iter.snapshot = true;
7372 info->iter.array_buffer = &info->iter.tr->max_buffer;
7373
7374 return ret;
7375 }
7376
7377 #endif /* CONFIG_TRACER_SNAPSHOT */
7378
7379
7380 static const struct file_operations tracing_thresh_fops = {
7381 .open = tracing_open_generic,
7382 .read = tracing_thresh_read,
7383 .write = tracing_thresh_write,
7384 .llseek = generic_file_llseek,
7385 };
7386
7387 #ifdef CONFIG_TRACER_MAX_TRACE
7388 static const struct file_operations tracing_max_lat_fops = {
7389 .open = tracing_open_generic_tr,
7390 .read = tracing_max_lat_read,
7391 .write = tracing_max_lat_write,
7392 .llseek = generic_file_llseek,
7393 .release = tracing_release_generic_tr,
7394 };
7395 #endif
7396
7397 static const struct file_operations set_tracer_fops = {
7398 .open = tracing_open_generic_tr,
7399 .read = tracing_set_trace_read,
7400 .write = tracing_set_trace_write,
7401 .llseek = generic_file_llseek,
7402 .release = tracing_release_generic_tr,
7403 };
7404
7405 static const struct file_operations tracing_pipe_fops = {
7406 .open = tracing_open_pipe,
7407 .poll = tracing_poll_pipe,
7408 .read = tracing_read_pipe,
7409 .splice_read = tracing_splice_read_pipe,
7410 .release = tracing_release_pipe,
7411 };
7412
7413 static const struct file_operations tracing_entries_fops = {
7414 .open = tracing_open_generic_tr,
7415 .read = tracing_entries_read,
7416 .write = tracing_entries_write,
7417 .llseek = generic_file_llseek,
7418 .release = tracing_release_generic_tr,
7419 };
7420
7421 static const struct file_operations tracing_buffer_meta_fops = {
7422 .open = tracing_buffer_meta_open,
7423 .read = seq_read,
7424 .llseek = seq_lseek,
7425 .release = tracing_seq_release,
7426 };
7427
7428 static const struct file_operations tracing_total_entries_fops = {
7429 .open = tracing_open_generic_tr,
7430 .read = tracing_total_entries_read,
7431 .llseek = generic_file_llseek,
7432 .release = tracing_release_generic_tr,
7433 };
7434
7435 static const struct file_operations tracing_free_buffer_fops = {
7436 .open = tracing_open_generic_tr,
7437 .write = tracing_free_buffer_write,
7438 .release = tracing_free_buffer_release,
7439 };
7440
7441 static const struct file_operations tracing_mark_fops = {
7442 .open = tracing_mark_open,
7443 .write = tracing_mark_write,
7444 .release = tracing_release_generic_tr,
7445 };
7446
7447 static const struct file_operations tracing_mark_raw_fops = {
7448 .open = tracing_mark_open,
7449 .write = tracing_mark_raw_write,
7450 .release = tracing_release_generic_tr,
7451 };
7452
7453 static const struct file_operations trace_clock_fops = {
7454 .open = tracing_clock_open,
7455 .read = seq_read,
7456 .llseek = seq_lseek,
7457 .release = tracing_single_release_tr,
7458 .write = tracing_clock_write,
7459 };
7460
7461 static const struct file_operations trace_time_stamp_mode_fops = {
7462 .open = tracing_time_stamp_mode_open,
7463 .read = seq_read,
7464 .llseek = seq_lseek,
7465 .release = tracing_single_release_tr,
7466 };
7467
7468 static const struct file_operations last_boot_fops = {
7469 .open = tracing_open_generic_tr,
7470 .read = tracing_last_boot_read,
7471 .llseek = generic_file_llseek,
7472 .release = tracing_release_generic_tr,
7473 };
7474
7475 #ifdef CONFIG_TRACER_SNAPSHOT
7476 static const struct file_operations snapshot_fops = {
7477 .open = tracing_snapshot_open,
7478 .read = seq_read,
7479 .write = tracing_snapshot_write,
7480 .llseek = tracing_lseek,
7481 .release = tracing_snapshot_release,
7482 };
7483
7484 static const struct file_operations snapshot_raw_fops = {
7485 .open = snapshot_raw_open,
7486 .read = tracing_buffers_read,
7487 .release = tracing_buffers_release,
7488 .splice_read = tracing_buffers_splice_read,
7489 };
7490
7491 #endif /* CONFIG_TRACER_SNAPSHOT */
7492
7493 /*
7494 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7495 * @filp: The active open file structure
7496 * @ubuf: The userspace provided buffer to read value into
7497 * @cnt: The maximum number of bytes to read
7498 * @ppos: The current "file" position
7499 *
7500 * This function implements the write interface for a struct trace_min_max_param.
7501 * The filp->private_data must point to a trace_min_max_param structure that
7502 * defines where to write the value, the min and the max acceptable values,
7503 * and a lock to protect the write.
7504 */
7505 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7506 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7507 {
7508 struct trace_min_max_param *param = filp->private_data;
7509 u64 val;
7510 int err;
7511
7512 if (!param)
7513 return -EFAULT;
7514
7515 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7516 if (err)
7517 return err;
7518
7519 if (param->lock)
7520 mutex_lock(param->lock);
7521
7522 if (param->min && val < *param->min)
7523 err = -EINVAL;
7524
7525 if (param->max && val > *param->max)
7526 err = -EINVAL;
7527
7528 if (!err)
7529 *param->val = val;
7530
7531 if (param->lock)
7532 mutex_unlock(param->lock);
7533
7534 if (err)
7535 return err;
7536
7537 return cnt;
7538 }
7539
7540 /*
7541 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7542 * @filp: The active open file structure
7543 * @ubuf: The userspace provided buffer to read value into
7544 * @cnt: The maximum number of bytes to read
7545 * @ppos: The current "file" position
7546 *
7547 * This function implements the read interface for a struct trace_min_max_param.
7548 * The filp->private_data must point to a trace_min_max_param struct with valid
7549 * data.
7550 */
7551 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7552 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7553 {
7554 struct trace_min_max_param *param = filp->private_data;
7555 char buf[U64_STR_SIZE];
7556 int len;
7557 u64 val;
7558
7559 if (!param)
7560 return -EFAULT;
7561
7562 val = *param->val;
7563
7564 if (cnt > sizeof(buf))
7565 cnt = sizeof(buf);
7566
7567 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7568
7569 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7570 }
7571
7572 const struct file_operations trace_min_max_fops = {
7573 .open = tracing_open_generic,
7574 .read = trace_min_max_read,
7575 .write = trace_min_max_write,
7576 };
7577
7578 #define TRACING_LOG_ERRS_MAX 8
7579 #define TRACING_LOG_LOC_MAX 128
7580
7581 #define CMD_PREFIX " Command: "
7582
7583 struct err_info {
7584 const char **errs; /* ptr to loc-specific array of err strings */
7585 u8 type; /* index into errs -> specific err string */
7586 u16 pos; /* caret position */
7587 u64 ts;
7588 };
7589
7590 struct tracing_log_err {
7591 struct list_head list;
7592 struct err_info info;
7593 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7594 char *cmd; /* what caused err */
7595 };
7596
7597 static DEFINE_MUTEX(tracing_err_log_lock);
7598
alloc_tracing_log_err(int len)7599 static struct tracing_log_err *alloc_tracing_log_err(int len)
7600 {
7601 struct tracing_log_err *err;
7602
7603 err = kzalloc(sizeof(*err), GFP_KERNEL);
7604 if (!err)
7605 return ERR_PTR(-ENOMEM);
7606
7607 err->cmd = kzalloc(len, GFP_KERNEL);
7608 if (!err->cmd) {
7609 kfree(err);
7610 return ERR_PTR(-ENOMEM);
7611 }
7612
7613 return err;
7614 }
7615
free_tracing_log_err(struct tracing_log_err * err)7616 static void free_tracing_log_err(struct tracing_log_err *err)
7617 {
7618 kfree(err->cmd);
7619 kfree(err);
7620 }
7621
get_tracing_log_err(struct trace_array * tr,int len)7622 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7623 int len)
7624 {
7625 struct tracing_log_err *err;
7626 char *cmd;
7627
7628 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7629 err = alloc_tracing_log_err(len);
7630 if (PTR_ERR(err) != -ENOMEM)
7631 tr->n_err_log_entries++;
7632
7633 return err;
7634 }
7635 cmd = kzalloc(len, GFP_KERNEL);
7636 if (!cmd)
7637 return ERR_PTR(-ENOMEM);
7638 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7639 kfree(err->cmd);
7640 err->cmd = cmd;
7641 list_del(&err->list);
7642
7643 return err;
7644 }
7645
7646 /**
7647 * err_pos - find the position of a string within a command for error careting
7648 * @cmd: The tracing command that caused the error
7649 * @str: The string to position the caret at within @cmd
7650 *
7651 * Finds the position of the first occurrence of @str within @cmd. The
7652 * return value can be passed to tracing_log_err() for caret placement
7653 * within @cmd.
7654 *
7655 * Returns the index within @cmd of the first occurrence of @str or 0
7656 * if @str was not found.
7657 */
err_pos(char * cmd,const char * str)7658 unsigned int err_pos(char *cmd, const char *str)
7659 {
7660 char *found;
7661
7662 if (WARN_ON(!strlen(cmd)))
7663 return 0;
7664
7665 found = strstr(cmd, str);
7666 if (found)
7667 return found - cmd;
7668
7669 return 0;
7670 }
7671
7672 /**
7673 * tracing_log_err - write an error to the tracing error log
7674 * @tr: The associated trace array for the error (NULL for top level array)
7675 * @loc: A string describing where the error occurred
7676 * @cmd: The tracing command that caused the error
7677 * @errs: The array of loc-specific static error strings
7678 * @type: The index into errs[], which produces the specific static err string
7679 * @pos: The position the caret should be placed in the cmd
7680 *
7681 * Writes an error into tracing/error_log of the form:
7682 *
7683 * <loc>: error: <text>
7684 * Command: <cmd>
7685 * ^
7686 *
7687 * tracing/error_log is a small log file containing the last
7688 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7689 * unless there has been a tracing error, and the error log can be
7690 * cleared and have its memory freed by writing the empty string in
7691 * truncation mode to it i.e. echo > tracing/error_log.
7692 *
7693 * NOTE: the @errs array along with the @type param are used to
7694 * produce a static error string - this string is not copied and saved
7695 * when the error is logged - only a pointer to it is saved. See
7696 * existing callers for examples of how static strings are typically
7697 * defined for use with tracing_log_err().
7698 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7699 void tracing_log_err(struct trace_array *tr,
7700 const char *loc, const char *cmd,
7701 const char **errs, u8 type, u16 pos)
7702 {
7703 struct tracing_log_err *err;
7704 int len = 0;
7705
7706 if (!tr)
7707 tr = &global_trace;
7708
7709 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7710
7711 guard(mutex)(&tracing_err_log_lock);
7712
7713 err = get_tracing_log_err(tr, len);
7714 if (PTR_ERR(err) == -ENOMEM)
7715 return;
7716
7717 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7718 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7719
7720 err->info.errs = errs;
7721 err->info.type = type;
7722 err->info.pos = pos;
7723 err->info.ts = local_clock();
7724
7725 list_add_tail(&err->list, &tr->err_log);
7726 }
7727
clear_tracing_err_log(struct trace_array * tr)7728 static void clear_tracing_err_log(struct trace_array *tr)
7729 {
7730 struct tracing_log_err *err, *next;
7731
7732 mutex_lock(&tracing_err_log_lock);
7733 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7734 list_del(&err->list);
7735 free_tracing_log_err(err);
7736 }
7737
7738 tr->n_err_log_entries = 0;
7739 mutex_unlock(&tracing_err_log_lock);
7740 }
7741
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7742 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7743 {
7744 struct trace_array *tr = m->private;
7745
7746 mutex_lock(&tracing_err_log_lock);
7747
7748 return seq_list_start(&tr->err_log, *pos);
7749 }
7750
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7751 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7752 {
7753 struct trace_array *tr = m->private;
7754
7755 return seq_list_next(v, &tr->err_log, pos);
7756 }
7757
tracing_err_log_seq_stop(struct seq_file * m,void * v)7758 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7759 {
7760 mutex_unlock(&tracing_err_log_lock);
7761 }
7762
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7763 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7764 {
7765 u16 i;
7766
7767 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7768 seq_putc(m, ' ');
7769 for (i = 0; i < pos; i++)
7770 seq_putc(m, ' ');
7771 seq_puts(m, "^\n");
7772 }
7773
tracing_err_log_seq_show(struct seq_file * m,void * v)7774 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7775 {
7776 struct tracing_log_err *err = v;
7777
7778 if (err) {
7779 const char *err_text = err->info.errs[err->info.type];
7780 u64 sec = err->info.ts;
7781 u32 nsec;
7782
7783 nsec = do_div(sec, NSEC_PER_SEC);
7784 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7785 err->loc, err_text);
7786 seq_printf(m, "%s", err->cmd);
7787 tracing_err_log_show_pos(m, err->info.pos);
7788 }
7789
7790 return 0;
7791 }
7792
7793 static const struct seq_operations tracing_err_log_seq_ops = {
7794 .start = tracing_err_log_seq_start,
7795 .next = tracing_err_log_seq_next,
7796 .stop = tracing_err_log_seq_stop,
7797 .show = tracing_err_log_seq_show
7798 };
7799
tracing_err_log_open(struct inode * inode,struct file * file)7800 static int tracing_err_log_open(struct inode *inode, struct file *file)
7801 {
7802 struct trace_array *tr = inode->i_private;
7803 int ret = 0;
7804
7805 ret = tracing_check_open_get_tr(tr);
7806 if (ret)
7807 return ret;
7808
7809 /* If this file was opened for write, then erase contents */
7810 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7811 clear_tracing_err_log(tr);
7812
7813 if (file->f_mode & FMODE_READ) {
7814 ret = seq_open(file, &tracing_err_log_seq_ops);
7815 if (!ret) {
7816 struct seq_file *m = file->private_data;
7817 m->private = tr;
7818 } else {
7819 trace_array_put(tr);
7820 }
7821 }
7822 return ret;
7823 }
7824
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7825 static ssize_t tracing_err_log_write(struct file *file,
7826 const char __user *buffer,
7827 size_t count, loff_t *ppos)
7828 {
7829 return count;
7830 }
7831
tracing_err_log_release(struct inode * inode,struct file * file)7832 static int tracing_err_log_release(struct inode *inode, struct file *file)
7833 {
7834 struct trace_array *tr = inode->i_private;
7835
7836 trace_array_put(tr);
7837
7838 if (file->f_mode & FMODE_READ)
7839 seq_release(inode, file);
7840
7841 return 0;
7842 }
7843
7844 static const struct file_operations tracing_err_log_fops = {
7845 .open = tracing_err_log_open,
7846 .write = tracing_err_log_write,
7847 .read = seq_read,
7848 .llseek = tracing_lseek,
7849 .release = tracing_err_log_release,
7850 };
7851
tracing_buffers_open(struct inode * inode,struct file * filp)7852 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7853 {
7854 struct trace_array *tr = inode->i_private;
7855 struct ftrace_buffer_info *info;
7856 int ret;
7857
7858 ret = tracing_check_open_get_tr(tr);
7859 if (ret)
7860 return ret;
7861
7862 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7863 if (!info) {
7864 trace_array_put(tr);
7865 return -ENOMEM;
7866 }
7867
7868 mutex_lock(&trace_types_lock);
7869
7870 info->iter.tr = tr;
7871 info->iter.cpu_file = tracing_get_cpu(inode);
7872 info->iter.trace = tr->current_trace;
7873 info->iter.array_buffer = &tr->array_buffer;
7874 info->spare = NULL;
7875 /* Force reading ring buffer for first read */
7876 info->read = (unsigned int)-1;
7877
7878 filp->private_data = info;
7879
7880 tr->trace_ref++;
7881
7882 mutex_unlock(&trace_types_lock);
7883
7884 ret = nonseekable_open(inode, filp);
7885 if (ret < 0)
7886 trace_array_put(tr);
7887
7888 return ret;
7889 }
7890
7891 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7892 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7893 {
7894 struct ftrace_buffer_info *info = filp->private_data;
7895 struct trace_iterator *iter = &info->iter;
7896
7897 return trace_poll(iter, filp, poll_table);
7898 }
7899
7900 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7901 tracing_buffers_read(struct file *filp, char __user *ubuf,
7902 size_t count, loff_t *ppos)
7903 {
7904 struct ftrace_buffer_info *info = filp->private_data;
7905 struct trace_iterator *iter = &info->iter;
7906 void *trace_data;
7907 int page_size;
7908 ssize_t ret = 0;
7909 ssize_t size;
7910
7911 if (!count)
7912 return 0;
7913
7914 #ifdef CONFIG_TRACER_MAX_TRACE
7915 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7916 return -EBUSY;
7917 #endif
7918
7919 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7920
7921 /* Make sure the spare matches the current sub buffer size */
7922 if (info->spare) {
7923 if (page_size != info->spare_size) {
7924 ring_buffer_free_read_page(iter->array_buffer->buffer,
7925 info->spare_cpu, info->spare);
7926 info->spare = NULL;
7927 }
7928 }
7929
7930 if (!info->spare) {
7931 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7932 iter->cpu_file);
7933 if (IS_ERR(info->spare)) {
7934 ret = PTR_ERR(info->spare);
7935 info->spare = NULL;
7936 } else {
7937 info->spare_cpu = iter->cpu_file;
7938 info->spare_size = page_size;
7939 }
7940 }
7941 if (!info->spare)
7942 return ret;
7943
7944 /* Do we have previous read data to read? */
7945 if (info->read < page_size)
7946 goto read;
7947
7948 again:
7949 trace_access_lock(iter->cpu_file);
7950 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7951 info->spare,
7952 count,
7953 iter->cpu_file, 0);
7954 trace_access_unlock(iter->cpu_file);
7955
7956 if (ret < 0) {
7957 if (trace_empty(iter) && !iter->closed) {
7958 if ((filp->f_flags & O_NONBLOCK))
7959 return -EAGAIN;
7960
7961 ret = wait_on_pipe(iter, 0);
7962 if (ret)
7963 return ret;
7964
7965 goto again;
7966 }
7967 return 0;
7968 }
7969
7970 info->read = 0;
7971 read:
7972 size = page_size - info->read;
7973 if (size > count)
7974 size = count;
7975 trace_data = ring_buffer_read_page_data(info->spare);
7976 ret = copy_to_user(ubuf, trace_data + info->read, size);
7977 if (ret == size)
7978 return -EFAULT;
7979
7980 size -= ret;
7981
7982 *ppos += size;
7983 info->read += size;
7984
7985 return size;
7986 }
7987
tracing_buffers_flush(struct file * file,fl_owner_t id)7988 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7989 {
7990 struct ftrace_buffer_info *info = file->private_data;
7991 struct trace_iterator *iter = &info->iter;
7992
7993 iter->closed = true;
7994 /* Make sure the waiters see the new wait_index */
7995 (void)atomic_fetch_inc_release(&iter->wait_index);
7996
7997 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7998
7999 return 0;
8000 }
8001
tracing_buffers_release(struct inode * inode,struct file * file)8002 static int tracing_buffers_release(struct inode *inode, struct file *file)
8003 {
8004 struct ftrace_buffer_info *info = file->private_data;
8005 struct trace_iterator *iter = &info->iter;
8006
8007 mutex_lock(&trace_types_lock);
8008
8009 iter->tr->trace_ref--;
8010
8011 __trace_array_put(iter->tr);
8012
8013 if (info->spare)
8014 ring_buffer_free_read_page(iter->array_buffer->buffer,
8015 info->spare_cpu, info->spare);
8016 kvfree(info);
8017
8018 mutex_unlock(&trace_types_lock);
8019
8020 return 0;
8021 }
8022
8023 struct buffer_ref {
8024 struct trace_buffer *buffer;
8025 void *page;
8026 int cpu;
8027 refcount_t refcount;
8028 };
8029
buffer_ref_release(struct buffer_ref * ref)8030 static void buffer_ref_release(struct buffer_ref *ref)
8031 {
8032 if (!refcount_dec_and_test(&ref->refcount))
8033 return;
8034 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8035 kfree(ref);
8036 }
8037
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8038 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8039 struct pipe_buffer *buf)
8040 {
8041 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8042
8043 buffer_ref_release(ref);
8044 buf->private = 0;
8045 }
8046
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8047 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8048 struct pipe_buffer *buf)
8049 {
8050 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8051
8052 if (refcount_read(&ref->refcount) > INT_MAX/2)
8053 return false;
8054
8055 refcount_inc(&ref->refcount);
8056 return true;
8057 }
8058
8059 /* Pipe buffer operations for a buffer. */
8060 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8061 .release = buffer_pipe_buf_release,
8062 .get = buffer_pipe_buf_get,
8063 };
8064
8065 /*
8066 * Callback from splice_to_pipe(), if we need to release some pages
8067 * at the end of the spd in case we error'ed out in filling the pipe.
8068 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8069 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8070 {
8071 struct buffer_ref *ref =
8072 (struct buffer_ref *)spd->partial[i].private;
8073
8074 buffer_ref_release(ref);
8075 spd->partial[i].private = 0;
8076 }
8077
8078 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8079 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8080 struct pipe_inode_info *pipe, size_t len,
8081 unsigned int flags)
8082 {
8083 struct ftrace_buffer_info *info = file->private_data;
8084 struct trace_iterator *iter = &info->iter;
8085 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8086 struct page *pages_def[PIPE_DEF_BUFFERS];
8087 struct splice_pipe_desc spd = {
8088 .pages = pages_def,
8089 .partial = partial_def,
8090 .nr_pages_max = PIPE_DEF_BUFFERS,
8091 .ops = &buffer_pipe_buf_ops,
8092 .spd_release = buffer_spd_release,
8093 };
8094 struct buffer_ref *ref;
8095 bool woken = false;
8096 int page_size;
8097 int entries, i;
8098 ssize_t ret = 0;
8099
8100 #ifdef CONFIG_TRACER_MAX_TRACE
8101 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8102 return -EBUSY;
8103 #endif
8104
8105 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8106 if (*ppos & (page_size - 1))
8107 return -EINVAL;
8108
8109 if (len & (page_size - 1)) {
8110 if (len < page_size)
8111 return -EINVAL;
8112 len &= (~(page_size - 1));
8113 }
8114
8115 if (splice_grow_spd(pipe, &spd))
8116 return -ENOMEM;
8117
8118 again:
8119 trace_access_lock(iter->cpu_file);
8120 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8121
8122 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8123 struct page *page;
8124 int r;
8125
8126 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8127 if (!ref) {
8128 ret = -ENOMEM;
8129 break;
8130 }
8131
8132 refcount_set(&ref->refcount, 1);
8133 ref->buffer = iter->array_buffer->buffer;
8134 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8135 if (IS_ERR(ref->page)) {
8136 ret = PTR_ERR(ref->page);
8137 ref->page = NULL;
8138 kfree(ref);
8139 break;
8140 }
8141 ref->cpu = iter->cpu_file;
8142
8143 r = ring_buffer_read_page(ref->buffer, ref->page,
8144 len, iter->cpu_file, 1);
8145 if (r < 0) {
8146 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8147 ref->page);
8148 kfree(ref);
8149 break;
8150 }
8151
8152 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8153
8154 spd.pages[i] = page;
8155 spd.partial[i].len = page_size;
8156 spd.partial[i].offset = 0;
8157 spd.partial[i].private = (unsigned long)ref;
8158 spd.nr_pages++;
8159 *ppos += page_size;
8160
8161 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8162 }
8163
8164 trace_access_unlock(iter->cpu_file);
8165 spd.nr_pages = i;
8166
8167 /* did we read anything? */
8168 if (!spd.nr_pages) {
8169
8170 if (ret)
8171 goto out;
8172
8173 if (woken)
8174 goto out;
8175
8176 ret = -EAGAIN;
8177 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8178 goto out;
8179
8180 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8181 if (ret)
8182 goto out;
8183
8184 /* No need to wait after waking up when tracing is off */
8185 if (!tracer_tracing_is_on(iter->tr))
8186 goto out;
8187
8188 /* Iterate one more time to collect any new data then exit */
8189 woken = true;
8190
8191 goto again;
8192 }
8193
8194 ret = splice_to_pipe(pipe, &spd);
8195 out:
8196 splice_shrink_spd(&spd);
8197
8198 return ret;
8199 }
8200
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8201 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8202 {
8203 struct ftrace_buffer_info *info = file->private_data;
8204 struct trace_iterator *iter = &info->iter;
8205 int err;
8206
8207 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8208 if (!(file->f_flags & O_NONBLOCK)) {
8209 err = ring_buffer_wait(iter->array_buffer->buffer,
8210 iter->cpu_file,
8211 iter->tr->buffer_percent,
8212 NULL, NULL);
8213 if (err)
8214 return err;
8215 }
8216
8217 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8218 iter->cpu_file);
8219 } else if (cmd) {
8220 return -ENOTTY;
8221 }
8222
8223 /*
8224 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8225 * waiters
8226 */
8227 mutex_lock(&trace_types_lock);
8228
8229 /* Make sure the waiters see the new wait_index */
8230 (void)atomic_fetch_inc_release(&iter->wait_index);
8231
8232 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8233
8234 mutex_unlock(&trace_types_lock);
8235 return 0;
8236 }
8237
8238 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8239 static int get_snapshot_map(struct trace_array *tr)
8240 {
8241 int err = 0;
8242
8243 /*
8244 * Called with mmap_lock held. lockdep would be unhappy if we would now
8245 * take trace_types_lock. Instead use the specific
8246 * snapshot_trigger_lock.
8247 */
8248 spin_lock(&tr->snapshot_trigger_lock);
8249
8250 if (tr->snapshot || tr->mapped == UINT_MAX)
8251 err = -EBUSY;
8252 else
8253 tr->mapped++;
8254
8255 spin_unlock(&tr->snapshot_trigger_lock);
8256
8257 /* Wait for update_max_tr() to observe iter->tr->mapped */
8258 if (tr->mapped == 1)
8259 synchronize_rcu();
8260
8261 return err;
8262
8263 }
put_snapshot_map(struct trace_array * tr)8264 static void put_snapshot_map(struct trace_array *tr)
8265 {
8266 spin_lock(&tr->snapshot_trigger_lock);
8267 if (!WARN_ON(!tr->mapped))
8268 tr->mapped--;
8269 spin_unlock(&tr->snapshot_trigger_lock);
8270 }
8271 #else
get_snapshot_map(struct trace_array * tr)8272 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8273 static inline void put_snapshot_map(struct trace_array *tr) { }
8274 #endif
8275
tracing_buffers_mmap_close(struct vm_area_struct * vma)8276 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8277 {
8278 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8279 struct trace_iterator *iter = &info->iter;
8280
8281 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8282 put_snapshot_map(iter->tr);
8283 }
8284
8285 static const struct vm_operations_struct tracing_buffers_vmops = {
8286 .close = tracing_buffers_mmap_close,
8287 };
8288
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8289 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8290 {
8291 struct ftrace_buffer_info *info = filp->private_data;
8292 struct trace_iterator *iter = &info->iter;
8293 int ret = 0;
8294
8295 /* Currently the boot mapped buffer is not supported for mmap */
8296 if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8297 return -ENODEV;
8298
8299 ret = get_snapshot_map(iter->tr);
8300 if (ret)
8301 return ret;
8302
8303 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8304 if (ret)
8305 put_snapshot_map(iter->tr);
8306
8307 vma->vm_ops = &tracing_buffers_vmops;
8308
8309 return ret;
8310 }
8311
8312 static const struct file_operations tracing_buffers_fops = {
8313 .open = tracing_buffers_open,
8314 .read = tracing_buffers_read,
8315 .poll = tracing_buffers_poll,
8316 .release = tracing_buffers_release,
8317 .flush = tracing_buffers_flush,
8318 .splice_read = tracing_buffers_splice_read,
8319 .unlocked_ioctl = tracing_buffers_ioctl,
8320 .mmap = tracing_buffers_mmap,
8321 };
8322
8323 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8324 tracing_stats_read(struct file *filp, char __user *ubuf,
8325 size_t count, loff_t *ppos)
8326 {
8327 struct inode *inode = file_inode(filp);
8328 struct trace_array *tr = inode->i_private;
8329 struct array_buffer *trace_buf = &tr->array_buffer;
8330 int cpu = tracing_get_cpu(inode);
8331 struct trace_seq *s;
8332 unsigned long cnt;
8333 unsigned long long t;
8334 unsigned long usec_rem;
8335
8336 s = kmalloc(sizeof(*s), GFP_KERNEL);
8337 if (!s)
8338 return -ENOMEM;
8339
8340 trace_seq_init(s);
8341
8342 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8343 trace_seq_printf(s, "entries: %ld\n", cnt);
8344
8345 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8346 trace_seq_printf(s, "overrun: %ld\n", cnt);
8347
8348 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8349 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8350
8351 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8352 trace_seq_printf(s, "bytes: %ld\n", cnt);
8353
8354 if (trace_clocks[tr->clock_id].in_ns) {
8355 /* local or global for trace_clock */
8356 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8357 usec_rem = do_div(t, USEC_PER_SEC);
8358 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8359 t, usec_rem);
8360
8361 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8362 usec_rem = do_div(t, USEC_PER_SEC);
8363 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8364 } else {
8365 /* counter or tsc mode for trace_clock */
8366 trace_seq_printf(s, "oldest event ts: %llu\n",
8367 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8368
8369 trace_seq_printf(s, "now ts: %llu\n",
8370 ring_buffer_time_stamp(trace_buf->buffer));
8371 }
8372
8373 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8374 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8375
8376 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8377 trace_seq_printf(s, "read events: %ld\n", cnt);
8378
8379 count = simple_read_from_buffer(ubuf, count, ppos,
8380 s->buffer, trace_seq_used(s));
8381
8382 kfree(s);
8383
8384 return count;
8385 }
8386
8387 static const struct file_operations tracing_stats_fops = {
8388 .open = tracing_open_generic_tr,
8389 .read = tracing_stats_read,
8390 .llseek = generic_file_llseek,
8391 .release = tracing_release_generic_tr,
8392 };
8393
8394 #ifdef CONFIG_DYNAMIC_FTRACE
8395
8396 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8397 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8398 size_t cnt, loff_t *ppos)
8399 {
8400 ssize_t ret;
8401 char *buf;
8402 int r;
8403
8404 /* 256 should be plenty to hold the amount needed */
8405 buf = kmalloc(256, GFP_KERNEL);
8406 if (!buf)
8407 return -ENOMEM;
8408
8409 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8410 ftrace_update_tot_cnt,
8411 ftrace_number_of_pages,
8412 ftrace_number_of_groups);
8413
8414 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8415 kfree(buf);
8416 return ret;
8417 }
8418
8419 static const struct file_operations tracing_dyn_info_fops = {
8420 .open = tracing_open_generic,
8421 .read = tracing_read_dyn_info,
8422 .llseek = generic_file_llseek,
8423 };
8424 #endif /* CONFIG_DYNAMIC_FTRACE */
8425
8426 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8427 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8428 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8429 struct trace_array *tr, struct ftrace_probe_ops *ops,
8430 void *data)
8431 {
8432 tracing_snapshot_instance(tr);
8433 }
8434
8435 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8436 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8437 struct trace_array *tr, struct ftrace_probe_ops *ops,
8438 void *data)
8439 {
8440 struct ftrace_func_mapper *mapper = data;
8441 long *count = NULL;
8442
8443 if (mapper)
8444 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8445
8446 if (count) {
8447
8448 if (*count <= 0)
8449 return;
8450
8451 (*count)--;
8452 }
8453
8454 tracing_snapshot_instance(tr);
8455 }
8456
8457 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8458 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8459 struct ftrace_probe_ops *ops, void *data)
8460 {
8461 struct ftrace_func_mapper *mapper = data;
8462 long *count = NULL;
8463
8464 seq_printf(m, "%ps:", (void *)ip);
8465
8466 seq_puts(m, "snapshot");
8467
8468 if (mapper)
8469 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8470
8471 if (count)
8472 seq_printf(m, ":count=%ld\n", *count);
8473 else
8474 seq_puts(m, ":unlimited\n");
8475
8476 return 0;
8477 }
8478
8479 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8480 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8481 unsigned long ip, void *init_data, void **data)
8482 {
8483 struct ftrace_func_mapper *mapper = *data;
8484
8485 if (!mapper) {
8486 mapper = allocate_ftrace_func_mapper();
8487 if (!mapper)
8488 return -ENOMEM;
8489 *data = mapper;
8490 }
8491
8492 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8493 }
8494
8495 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8496 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8497 unsigned long ip, void *data)
8498 {
8499 struct ftrace_func_mapper *mapper = data;
8500
8501 if (!ip) {
8502 if (!mapper)
8503 return;
8504 free_ftrace_func_mapper(mapper, NULL);
8505 return;
8506 }
8507
8508 ftrace_func_mapper_remove_ip(mapper, ip);
8509 }
8510
8511 static struct ftrace_probe_ops snapshot_probe_ops = {
8512 .func = ftrace_snapshot,
8513 .print = ftrace_snapshot_print,
8514 };
8515
8516 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8517 .func = ftrace_count_snapshot,
8518 .print = ftrace_snapshot_print,
8519 .init = ftrace_snapshot_init,
8520 .free = ftrace_snapshot_free,
8521 };
8522
8523 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8524 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8525 char *glob, char *cmd, char *param, int enable)
8526 {
8527 struct ftrace_probe_ops *ops;
8528 void *count = (void *)-1;
8529 char *number;
8530 int ret;
8531
8532 if (!tr)
8533 return -ENODEV;
8534
8535 /* hash funcs only work with set_ftrace_filter */
8536 if (!enable)
8537 return -EINVAL;
8538
8539 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8540
8541 if (glob[0] == '!') {
8542 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8543 if (!ret)
8544 tracing_disarm_snapshot(tr);
8545
8546 return ret;
8547 }
8548
8549 if (!param)
8550 goto out_reg;
8551
8552 number = strsep(¶m, ":");
8553
8554 if (!strlen(number))
8555 goto out_reg;
8556
8557 /*
8558 * We use the callback data field (which is a pointer)
8559 * as our counter.
8560 */
8561 ret = kstrtoul(number, 0, (unsigned long *)&count);
8562 if (ret)
8563 return ret;
8564
8565 out_reg:
8566 ret = tracing_arm_snapshot(tr);
8567 if (ret < 0)
8568 return ret;
8569
8570 ret = register_ftrace_function_probe(glob, tr, ops, count);
8571 if (ret < 0)
8572 tracing_disarm_snapshot(tr);
8573
8574 return ret < 0 ? ret : 0;
8575 }
8576
8577 static struct ftrace_func_command ftrace_snapshot_cmd = {
8578 .name = "snapshot",
8579 .func = ftrace_trace_snapshot_callback,
8580 };
8581
register_snapshot_cmd(void)8582 static __init int register_snapshot_cmd(void)
8583 {
8584 return register_ftrace_command(&ftrace_snapshot_cmd);
8585 }
8586 #else
register_snapshot_cmd(void)8587 static inline __init int register_snapshot_cmd(void) { return 0; }
8588 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8589
tracing_get_dentry(struct trace_array * tr)8590 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8591 {
8592 if (WARN_ON(!tr->dir))
8593 return ERR_PTR(-ENODEV);
8594
8595 /* Top directory uses NULL as the parent */
8596 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8597 return NULL;
8598
8599 /* All sub buffers have a descriptor */
8600 return tr->dir;
8601 }
8602
tracing_dentry_percpu(struct trace_array * tr,int cpu)8603 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8604 {
8605 struct dentry *d_tracer;
8606
8607 if (tr->percpu_dir)
8608 return tr->percpu_dir;
8609
8610 d_tracer = tracing_get_dentry(tr);
8611 if (IS_ERR(d_tracer))
8612 return NULL;
8613
8614 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8615
8616 MEM_FAIL(!tr->percpu_dir,
8617 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8618
8619 return tr->percpu_dir;
8620 }
8621
8622 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8623 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8624 void *data, long cpu, const struct file_operations *fops)
8625 {
8626 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8627
8628 if (ret) /* See tracing_get_cpu() */
8629 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8630 return ret;
8631 }
8632
8633 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8634 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8635 {
8636 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8637 struct dentry *d_cpu;
8638 char cpu_dir[30]; /* 30 characters should be more than enough */
8639
8640 if (!d_percpu)
8641 return;
8642
8643 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8644 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8645 if (!d_cpu) {
8646 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8647 return;
8648 }
8649
8650 /* per cpu trace_pipe */
8651 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8652 tr, cpu, &tracing_pipe_fops);
8653
8654 /* per cpu trace */
8655 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8656 tr, cpu, &tracing_fops);
8657
8658 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8659 tr, cpu, &tracing_buffers_fops);
8660
8661 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8662 tr, cpu, &tracing_stats_fops);
8663
8664 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8665 tr, cpu, &tracing_entries_fops);
8666
8667 if (tr->range_addr_start)
8668 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8669 tr, cpu, &tracing_buffer_meta_fops);
8670 #ifdef CONFIG_TRACER_SNAPSHOT
8671 if (!tr->range_addr_start) {
8672 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8673 tr, cpu, &snapshot_fops);
8674
8675 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8676 tr, cpu, &snapshot_raw_fops);
8677 }
8678 #endif
8679 }
8680
8681 #ifdef CONFIG_FTRACE_SELFTEST
8682 /* Let selftest have access to static functions in this file */
8683 #include "trace_selftest.c"
8684 #endif
8685
8686 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8687 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8688 loff_t *ppos)
8689 {
8690 struct trace_option_dentry *topt = filp->private_data;
8691 char *buf;
8692
8693 if (topt->flags->val & topt->opt->bit)
8694 buf = "1\n";
8695 else
8696 buf = "0\n";
8697
8698 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8699 }
8700
8701 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8702 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8703 loff_t *ppos)
8704 {
8705 struct trace_option_dentry *topt = filp->private_data;
8706 unsigned long val;
8707 int ret;
8708
8709 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8710 if (ret)
8711 return ret;
8712
8713 if (val != 0 && val != 1)
8714 return -EINVAL;
8715
8716 if (!!(topt->flags->val & topt->opt->bit) != val) {
8717 mutex_lock(&trace_types_lock);
8718 ret = __set_tracer_option(topt->tr, topt->flags,
8719 topt->opt, !val);
8720 mutex_unlock(&trace_types_lock);
8721 if (ret)
8722 return ret;
8723 }
8724
8725 *ppos += cnt;
8726
8727 return cnt;
8728 }
8729
tracing_open_options(struct inode * inode,struct file * filp)8730 static int tracing_open_options(struct inode *inode, struct file *filp)
8731 {
8732 struct trace_option_dentry *topt = inode->i_private;
8733 int ret;
8734
8735 ret = tracing_check_open_get_tr(topt->tr);
8736 if (ret)
8737 return ret;
8738
8739 filp->private_data = inode->i_private;
8740 return 0;
8741 }
8742
tracing_release_options(struct inode * inode,struct file * file)8743 static int tracing_release_options(struct inode *inode, struct file *file)
8744 {
8745 struct trace_option_dentry *topt = file->private_data;
8746
8747 trace_array_put(topt->tr);
8748 return 0;
8749 }
8750
8751 static const struct file_operations trace_options_fops = {
8752 .open = tracing_open_options,
8753 .read = trace_options_read,
8754 .write = trace_options_write,
8755 .llseek = generic_file_llseek,
8756 .release = tracing_release_options,
8757 };
8758
8759 /*
8760 * In order to pass in both the trace_array descriptor as well as the index
8761 * to the flag that the trace option file represents, the trace_array
8762 * has a character array of trace_flags_index[], which holds the index
8763 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8764 * The address of this character array is passed to the flag option file
8765 * read/write callbacks.
8766 *
8767 * In order to extract both the index and the trace_array descriptor,
8768 * get_tr_index() uses the following algorithm.
8769 *
8770 * idx = *ptr;
8771 *
8772 * As the pointer itself contains the address of the index (remember
8773 * index[1] == 1).
8774 *
8775 * Then to get the trace_array descriptor, by subtracting that index
8776 * from the ptr, we get to the start of the index itself.
8777 *
8778 * ptr - idx == &index[0]
8779 *
8780 * Then a simple container_of() from that pointer gets us to the
8781 * trace_array descriptor.
8782 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8783 static void get_tr_index(void *data, struct trace_array **ptr,
8784 unsigned int *pindex)
8785 {
8786 *pindex = *(unsigned char *)data;
8787
8788 *ptr = container_of(data - *pindex, struct trace_array,
8789 trace_flags_index);
8790 }
8791
8792 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8793 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8794 loff_t *ppos)
8795 {
8796 void *tr_index = filp->private_data;
8797 struct trace_array *tr;
8798 unsigned int index;
8799 char *buf;
8800
8801 get_tr_index(tr_index, &tr, &index);
8802
8803 if (tr->trace_flags & (1 << index))
8804 buf = "1\n";
8805 else
8806 buf = "0\n";
8807
8808 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8809 }
8810
8811 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8812 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8813 loff_t *ppos)
8814 {
8815 void *tr_index = filp->private_data;
8816 struct trace_array *tr;
8817 unsigned int index;
8818 unsigned long val;
8819 int ret;
8820
8821 get_tr_index(tr_index, &tr, &index);
8822
8823 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8824 if (ret)
8825 return ret;
8826
8827 if (val != 0 && val != 1)
8828 return -EINVAL;
8829
8830 mutex_lock(&event_mutex);
8831 mutex_lock(&trace_types_lock);
8832 ret = set_tracer_flag(tr, 1 << index, val);
8833 mutex_unlock(&trace_types_lock);
8834 mutex_unlock(&event_mutex);
8835
8836 if (ret < 0)
8837 return ret;
8838
8839 *ppos += cnt;
8840
8841 return cnt;
8842 }
8843
8844 static const struct file_operations trace_options_core_fops = {
8845 .open = tracing_open_generic,
8846 .read = trace_options_core_read,
8847 .write = trace_options_core_write,
8848 .llseek = generic_file_llseek,
8849 };
8850
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8851 struct dentry *trace_create_file(const char *name,
8852 umode_t mode,
8853 struct dentry *parent,
8854 void *data,
8855 const struct file_operations *fops)
8856 {
8857 struct dentry *ret;
8858
8859 ret = tracefs_create_file(name, mode, parent, data, fops);
8860 if (!ret)
8861 pr_warn("Could not create tracefs '%s' entry\n", name);
8862
8863 return ret;
8864 }
8865
8866
trace_options_init_dentry(struct trace_array * tr)8867 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8868 {
8869 struct dentry *d_tracer;
8870
8871 if (tr->options)
8872 return tr->options;
8873
8874 d_tracer = tracing_get_dentry(tr);
8875 if (IS_ERR(d_tracer))
8876 return NULL;
8877
8878 tr->options = tracefs_create_dir("options", d_tracer);
8879 if (!tr->options) {
8880 pr_warn("Could not create tracefs directory 'options'\n");
8881 return NULL;
8882 }
8883
8884 return tr->options;
8885 }
8886
8887 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8888 create_trace_option_file(struct trace_array *tr,
8889 struct trace_option_dentry *topt,
8890 struct tracer_flags *flags,
8891 struct tracer_opt *opt)
8892 {
8893 struct dentry *t_options;
8894
8895 t_options = trace_options_init_dentry(tr);
8896 if (!t_options)
8897 return;
8898
8899 topt->flags = flags;
8900 topt->opt = opt;
8901 topt->tr = tr;
8902
8903 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8904 t_options, topt, &trace_options_fops);
8905
8906 }
8907
8908 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8909 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8910 {
8911 struct trace_option_dentry *topts;
8912 struct trace_options *tr_topts;
8913 struct tracer_flags *flags;
8914 struct tracer_opt *opts;
8915 int cnt;
8916 int i;
8917
8918 if (!tracer)
8919 return;
8920
8921 flags = tracer->flags;
8922
8923 if (!flags || !flags->opts)
8924 return;
8925
8926 /*
8927 * If this is an instance, only create flags for tracers
8928 * the instance may have.
8929 */
8930 if (!trace_ok_for_array(tracer, tr))
8931 return;
8932
8933 for (i = 0; i < tr->nr_topts; i++) {
8934 /* Make sure there's no duplicate flags. */
8935 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8936 return;
8937 }
8938
8939 opts = flags->opts;
8940
8941 for (cnt = 0; opts[cnt].name; cnt++)
8942 ;
8943
8944 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8945 if (!topts)
8946 return;
8947
8948 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8949 GFP_KERNEL);
8950 if (!tr_topts) {
8951 kfree(topts);
8952 return;
8953 }
8954
8955 tr->topts = tr_topts;
8956 tr->topts[tr->nr_topts].tracer = tracer;
8957 tr->topts[tr->nr_topts].topts = topts;
8958 tr->nr_topts++;
8959
8960 for (cnt = 0; opts[cnt].name; cnt++) {
8961 create_trace_option_file(tr, &topts[cnt], flags,
8962 &opts[cnt]);
8963 MEM_FAIL(topts[cnt].entry == NULL,
8964 "Failed to create trace option: %s",
8965 opts[cnt].name);
8966 }
8967 }
8968
8969 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8970 create_trace_option_core_file(struct trace_array *tr,
8971 const char *option, long index)
8972 {
8973 struct dentry *t_options;
8974
8975 t_options = trace_options_init_dentry(tr);
8976 if (!t_options)
8977 return NULL;
8978
8979 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8980 (void *)&tr->trace_flags_index[index],
8981 &trace_options_core_fops);
8982 }
8983
create_trace_options_dir(struct trace_array * tr)8984 static void create_trace_options_dir(struct trace_array *tr)
8985 {
8986 struct dentry *t_options;
8987 bool top_level = tr == &global_trace;
8988 int i;
8989
8990 t_options = trace_options_init_dentry(tr);
8991 if (!t_options)
8992 return;
8993
8994 for (i = 0; trace_options[i]; i++) {
8995 if (top_level ||
8996 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8997 create_trace_option_core_file(tr, trace_options[i], i);
8998 }
8999 }
9000
9001 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9002 rb_simple_read(struct file *filp, char __user *ubuf,
9003 size_t cnt, loff_t *ppos)
9004 {
9005 struct trace_array *tr = filp->private_data;
9006 char buf[64];
9007 int r;
9008
9009 r = tracer_tracing_is_on(tr);
9010 r = sprintf(buf, "%d\n", r);
9011
9012 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9013 }
9014
9015 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9016 rb_simple_write(struct file *filp, const char __user *ubuf,
9017 size_t cnt, loff_t *ppos)
9018 {
9019 struct trace_array *tr = filp->private_data;
9020 struct trace_buffer *buffer = tr->array_buffer.buffer;
9021 unsigned long val;
9022 int ret;
9023
9024 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9025 if (ret)
9026 return ret;
9027
9028 if (buffer) {
9029 mutex_lock(&trace_types_lock);
9030 if (!!val == tracer_tracing_is_on(tr)) {
9031 val = 0; /* do nothing */
9032 } else if (val) {
9033 tracer_tracing_on(tr);
9034 if (tr->current_trace->start)
9035 tr->current_trace->start(tr);
9036 } else {
9037 tracer_tracing_off(tr);
9038 if (tr->current_trace->stop)
9039 tr->current_trace->stop(tr);
9040 /* Wake up any waiters */
9041 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9042 }
9043 mutex_unlock(&trace_types_lock);
9044 }
9045
9046 (*ppos)++;
9047
9048 return cnt;
9049 }
9050
9051 static const struct file_operations rb_simple_fops = {
9052 .open = tracing_open_generic_tr,
9053 .read = rb_simple_read,
9054 .write = rb_simple_write,
9055 .release = tracing_release_generic_tr,
9056 .llseek = default_llseek,
9057 };
9058
9059 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9060 buffer_percent_read(struct file *filp, char __user *ubuf,
9061 size_t cnt, loff_t *ppos)
9062 {
9063 struct trace_array *tr = filp->private_data;
9064 char buf[64];
9065 int r;
9066
9067 r = tr->buffer_percent;
9068 r = sprintf(buf, "%d\n", r);
9069
9070 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9071 }
9072
9073 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9074 buffer_percent_write(struct file *filp, const char __user *ubuf,
9075 size_t cnt, loff_t *ppos)
9076 {
9077 struct trace_array *tr = filp->private_data;
9078 unsigned long val;
9079 int ret;
9080
9081 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9082 if (ret)
9083 return ret;
9084
9085 if (val > 100)
9086 return -EINVAL;
9087
9088 tr->buffer_percent = val;
9089
9090 (*ppos)++;
9091
9092 return cnt;
9093 }
9094
9095 static const struct file_operations buffer_percent_fops = {
9096 .open = tracing_open_generic_tr,
9097 .read = buffer_percent_read,
9098 .write = buffer_percent_write,
9099 .release = tracing_release_generic_tr,
9100 .llseek = default_llseek,
9101 };
9102
9103 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9104 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9105 {
9106 struct trace_array *tr = filp->private_data;
9107 size_t size;
9108 char buf[64];
9109 int order;
9110 int r;
9111
9112 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9113 size = (PAGE_SIZE << order) / 1024;
9114
9115 r = sprintf(buf, "%zd\n", size);
9116
9117 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9118 }
9119
9120 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9121 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9122 size_t cnt, loff_t *ppos)
9123 {
9124 struct trace_array *tr = filp->private_data;
9125 unsigned long val;
9126 int old_order;
9127 int order;
9128 int pages;
9129 int ret;
9130
9131 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9132 if (ret)
9133 return ret;
9134
9135 val *= 1024; /* value passed in is in KB */
9136
9137 pages = DIV_ROUND_UP(val, PAGE_SIZE);
9138 order = fls(pages - 1);
9139
9140 /* limit between 1 and 128 system pages */
9141 if (order < 0 || order > 7)
9142 return -EINVAL;
9143
9144 /* Do not allow tracing while changing the order of the ring buffer */
9145 tracing_stop_tr(tr);
9146
9147 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9148 if (old_order == order)
9149 goto out;
9150
9151 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9152 if (ret)
9153 goto out;
9154
9155 #ifdef CONFIG_TRACER_MAX_TRACE
9156
9157 if (!tr->allocated_snapshot)
9158 goto out_max;
9159
9160 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9161 if (ret) {
9162 /* Put back the old order */
9163 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9164 if (WARN_ON_ONCE(cnt)) {
9165 /*
9166 * AARGH! We are left with different orders!
9167 * The max buffer is our "snapshot" buffer.
9168 * When a tracer needs a snapshot (one of the
9169 * latency tracers), it swaps the max buffer
9170 * with the saved snap shot. We succeeded to
9171 * update the order of the main buffer, but failed to
9172 * update the order of the max buffer. But when we tried
9173 * to reset the main buffer to the original size, we
9174 * failed there too. This is very unlikely to
9175 * happen, but if it does, warn and kill all
9176 * tracing.
9177 */
9178 tracing_disabled = 1;
9179 }
9180 goto out;
9181 }
9182 out_max:
9183 #endif
9184 (*ppos)++;
9185 out:
9186 if (ret)
9187 cnt = ret;
9188 tracing_start_tr(tr);
9189 return cnt;
9190 }
9191
9192 static const struct file_operations buffer_subbuf_size_fops = {
9193 .open = tracing_open_generic_tr,
9194 .read = buffer_subbuf_size_read,
9195 .write = buffer_subbuf_size_write,
9196 .release = tracing_release_generic_tr,
9197 .llseek = default_llseek,
9198 };
9199
9200 static struct dentry *trace_instance_dir;
9201
9202 static void
9203 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9204
9205 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9206 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9207 {
9208 enum ring_buffer_flags rb_flags;
9209
9210 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9211
9212 buf->tr = tr;
9213
9214 if (tr->range_addr_start && tr->range_addr_size) {
9215 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9216 tr->range_addr_start,
9217 tr->range_addr_size);
9218
9219 ring_buffer_last_boot_delta(buf->buffer,
9220 &tr->text_delta, &tr->data_delta);
9221 /*
9222 * This is basically the same as a mapped buffer,
9223 * with the same restrictions.
9224 */
9225 tr->mapped++;
9226 } else {
9227 buf->buffer = ring_buffer_alloc(size, rb_flags);
9228 }
9229 if (!buf->buffer)
9230 return -ENOMEM;
9231
9232 buf->data = alloc_percpu(struct trace_array_cpu);
9233 if (!buf->data) {
9234 ring_buffer_free(buf->buffer);
9235 buf->buffer = NULL;
9236 return -ENOMEM;
9237 }
9238
9239 /* Allocate the first page for all buffers */
9240 set_buffer_entries(&tr->array_buffer,
9241 ring_buffer_size(tr->array_buffer.buffer, 0));
9242
9243 return 0;
9244 }
9245
free_trace_buffer(struct array_buffer * buf)9246 static void free_trace_buffer(struct array_buffer *buf)
9247 {
9248 if (buf->buffer) {
9249 ring_buffer_free(buf->buffer);
9250 buf->buffer = NULL;
9251 free_percpu(buf->data);
9252 buf->data = NULL;
9253 }
9254 }
9255
allocate_trace_buffers(struct trace_array * tr,int size)9256 static int allocate_trace_buffers(struct trace_array *tr, int size)
9257 {
9258 int ret;
9259
9260 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9261 if (ret)
9262 return ret;
9263
9264 #ifdef CONFIG_TRACER_MAX_TRACE
9265 /* Fix mapped buffer trace arrays do not have snapshot buffers */
9266 if (tr->range_addr_start)
9267 return 0;
9268
9269 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9270 allocate_snapshot ? size : 1);
9271 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9272 free_trace_buffer(&tr->array_buffer);
9273 return -ENOMEM;
9274 }
9275 tr->allocated_snapshot = allocate_snapshot;
9276
9277 allocate_snapshot = false;
9278 #endif
9279
9280 return 0;
9281 }
9282
free_trace_buffers(struct trace_array * tr)9283 static void free_trace_buffers(struct trace_array *tr)
9284 {
9285 if (!tr)
9286 return;
9287
9288 free_trace_buffer(&tr->array_buffer);
9289
9290 #ifdef CONFIG_TRACER_MAX_TRACE
9291 free_trace_buffer(&tr->max_buffer);
9292 #endif
9293 }
9294
init_trace_flags_index(struct trace_array * tr)9295 static void init_trace_flags_index(struct trace_array *tr)
9296 {
9297 int i;
9298
9299 /* Used by the trace options files */
9300 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9301 tr->trace_flags_index[i] = i;
9302 }
9303
__update_tracer_options(struct trace_array * tr)9304 static void __update_tracer_options(struct trace_array *tr)
9305 {
9306 struct tracer *t;
9307
9308 for (t = trace_types; t; t = t->next)
9309 add_tracer_options(tr, t);
9310 }
9311
update_tracer_options(struct trace_array * tr)9312 static void update_tracer_options(struct trace_array *tr)
9313 {
9314 mutex_lock(&trace_types_lock);
9315 tracer_options_updated = true;
9316 __update_tracer_options(tr);
9317 mutex_unlock(&trace_types_lock);
9318 }
9319
9320 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9321 struct trace_array *trace_array_find(const char *instance)
9322 {
9323 struct trace_array *tr, *found = NULL;
9324
9325 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9326 if (tr->name && strcmp(tr->name, instance) == 0) {
9327 found = tr;
9328 break;
9329 }
9330 }
9331
9332 return found;
9333 }
9334
trace_array_find_get(const char * instance)9335 struct trace_array *trace_array_find_get(const char *instance)
9336 {
9337 struct trace_array *tr;
9338
9339 mutex_lock(&trace_types_lock);
9340 tr = trace_array_find(instance);
9341 if (tr)
9342 tr->ref++;
9343 mutex_unlock(&trace_types_lock);
9344
9345 return tr;
9346 }
9347
trace_array_create_dir(struct trace_array * tr)9348 static int trace_array_create_dir(struct trace_array *tr)
9349 {
9350 int ret;
9351
9352 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9353 if (!tr->dir)
9354 return -EINVAL;
9355
9356 ret = event_trace_add_tracer(tr->dir, tr);
9357 if (ret) {
9358 tracefs_remove(tr->dir);
9359 return ret;
9360 }
9361
9362 init_tracer_tracefs(tr, tr->dir);
9363 __update_tracer_options(tr);
9364
9365 return ret;
9366 }
9367
9368 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9369 trace_array_create_systems(const char *name, const char *systems,
9370 unsigned long range_addr_start,
9371 unsigned long range_addr_size)
9372 {
9373 struct trace_array *tr;
9374 int ret;
9375
9376 ret = -ENOMEM;
9377 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9378 if (!tr)
9379 return ERR_PTR(ret);
9380
9381 tr->name = kstrdup(name, GFP_KERNEL);
9382 if (!tr->name)
9383 goto out_free_tr;
9384
9385 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9386 goto out_free_tr;
9387
9388 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9389 goto out_free_tr;
9390
9391 if (systems) {
9392 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9393 if (!tr->system_names)
9394 goto out_free_tr;
9395 }
9396
9397 /* Only for boot up memory mapped ring buffers */
9398 tr->range_addr_start = range_addr_start;
9399 tr->range_addr_size = range_addr_size;
9400
9401 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9402
9403 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9404
9405 raw_spin_lock_init(&tr->start_lock);
9406
9407 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9408 #ifdef CONFIG_TRACER_MAX_TRACE
9409 spin_lock_init(&tr->snapshot_trigger_lock);
9410 #endif
9411 tr->current_trace = &nop_trace;
9412
9413 INIT_LIST_HEAD(&tr->systems);
9414 INIT_LIST_HEAD(&tr->events);
9415 INIT_LIST_HEAD(&tr->hist_vars);
9416 INIT_LIST_HEAD(&tr->err_log);
9417
9418 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9419 goto out_free_tr;
9420
9421 /* The ring buffer is defaultly expanded */
9422 trace_set_ring_buffer_expanded(tr);
9423
9424 if (ftrace_allocate_ftrace_ops(tr) < 0)
9425 goto out_free_tr;
9426
9427 ftrace_init_trace_array(tr);
9428
9429 init_trace_flags_index(tr);
9430
9431 if (trace_instance_dir) {
9432 ret = trace_array_create_dir(tr);
9433 if (ret)
9434 goto out_free_tr;
9435 } else
9436 __trace_early_add_events(tr);
9437
9438 list_add(&tr->list, &ftrace_trace_arrays);
9439
9440 tr->ref++;
9441
9442 return tr;
9443
9444 out_free_tr:
9445 ftrace_free_ftrace_ops(tr);
9446 free_trace_buffers(tr);
9447 free_cpumask_var(tr->pipe_cpumask);
9448 free_cpumask_var(tr->tracing_cpumask);
9449 kfree_const(tr->system_names);
9450 kfree(tr->name);
9451 kfree(tr);
9452
9453 return ERR_PTR(ret);
9454 }
9455
trace_array_create(const char * name)9456 static struct trace_array *trace_array_create(const char *name)
9457 {
9458 return trace_array_create_systems(name, NULL, 0, 0);
9459 }
9460
instance_mkdir(const char * name)9461 static int instance_mkdir(const char *name)
9462 {
9463 struct trace_array *tr;
9464 int ret;
9465
9466 guard(mutex)(&event_mutex);
9467 guard(mutex)(&trace_types_lock);
9468
9469 ret = -EEXIST;
9470 if (trace_array_find(name))
9471 return -EEXIST;
9472
9473 tr = trace_array_create(name);
9474
9475 ret = PTR_ERR_OR_ZERO(tr);
9476
9477 return ret;
9478 }
9479
map_pages(u64 start,u64 size)9480 static u64 map_pages(u64 start, u64 size)
9481 {
9482 struct page **pages;
9483 phys_addr_t page_start;
9484 unsigned int page_count;
9485 unsigned int i;
9486 void *vaddr;
9487
9488 page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9489
9490 page_start = start;
9491 pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9492 if (!pages)
9493 return 0;
9494
9495 for (i = 0; i < page_count; i++) {
9496 phys_addr_t addr = page_start + i * PAGE_SIZE;
9497 pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9498 }
9499 vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9500 kfree(pages);
9501
9502 return (u64)(unsigned long)vaddr;
9503 }
9504
9505 /**
9506 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9507 * @name: The name of the trace array to be looked up/created.
9508 * @systems: A list of systems to create event directories for (NULL for all)
9509 *
9510 * Returns pointer to trace array with given name.
9511 * NULL, if it cannot be created.
9512 *
9513 * NOTE: This function increments the reference counter associated with the
9514 * trace array returned. This makes sure it cannot be freed while in use.
9515 * Use trace_array_put() once the trace array is no longer needed.
9516 * If the trace_array is to be freed, trace_array_destroy() needs to
9517 * be called after the trace_array_put(), or simply let user space delete
9518 * it from the tracefs instances directory. But until the
9519 * trace_array_put() is called, user space can not delete it.
9520 *
9521 */
trace_array_get_by_name(const char * name,const char * systems)9522 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9523 {
9524 struct trace_array *tr;
9525
9526 guard(mutex)(&event_mutex);
9527 guard(mutex)(&trace_types_lock);
9528
9529 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9530 if (tr->name && strcmp(tr->name, name) == 0) {
9531 tr->ref++;
9532 return tr;
9533 }
9534 }
9535
9536 tr = trace_array_create_systems(name, systems, 0, 0);
9537
9538 if (IS_ERR(tr))
9539 tr = NULL;
9540 else
9541 tr->ref++;
9542
9543 return tr;
9544 }
9545 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9546
__remove_instance(struct trace_array * tr)9547 static int __remove_instance(struct trace_array *tr)
9548 {
9549 int i;
9550
9551 /* Reference counter for a newly created trace array = 1. */
9552 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9553 return -EBUSY;
9554
9555 list_del(&tr->list);
9556
9557 /* Disable all the flags that were enabled coming in */
9558 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9559 if ((1 << i) & ZEROED_TRACE_FLAGS)
9560 set_tracer_flag(tr, 1 << i, 0);
9561 }
9562
9563 if (printk_trace == tr)
9564 update_printk_trace(&global_trace);
9565
9566 tracing_set_nop(tr);
9567 clear_ftrace_function_probes(tr);
9568 event_trace_del_tracer(tr);
9569 ftrace_clear_pids(tr);
9570 ftrace_destroy_function_files(tr);
9571 tracefs_remove(tr->dir);
9572 free_percpu(tr->last_func_repeats);
9573 free_trace_buffers(tr);
9574 clear_tracing_err_log(tr);
9575
9576 for (i = 0; i < tr->nr_topts; i++) {
9577 kfree(tr->topts[i].topts);
9578 }
9579 kfree(tr->topts);
9580
9581 free_cpumask_var(tr->pipe_cpumask);
9582 free_cpumask_var(tr->tracing_cpumask);
9583 kfree_const(tr->system_names);
9584 kfree(tr->name);
9585 kfree(tr);
9586
9587 return 0;
9588 }
9589
trace_array_destroy(struct trace_array * this_tr)9590 int trace_array_destroy(struct trace_array *this_tr)
9591 {
9592 struct trace_array *tr;
9593
9594 if (!this_tr)
9595 return -EINVAL;
9596
9597 guard(mutex)(&event_mutex);
9598 guard(mutex)(&trace_types_lock);
9599
9600
9601 /* Making sure trace array exists before destroying it. */
9602 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9603 if (tr == this_tr)
9604 return __remove_instance(tr);
9605 }
9606
9607 return -ENODEV;
9608 }
9609 EXPORT_SYMBOL_GPL(trace_array_destroy);
9610
instance_rmdir(const char * name)9611 static int instance_rmdir(const char *name)
9612 {
9613 struct trace_array *tr;
9614
9615 guard(mutex)(&event_mutex);
9616 guard(mutex)(&trace_types_lock);
9617
9618 tr = trace_array_find(name);
9619 if (!tr)
9620 return -ENODEV;
9621
9622 return __remove_instance(tr);
9623 }
9624
create_trace_instances(struct dentry * d_tracer)9625 static __init void create_trace_instances(struct dentry *d_tracer)
9626 {
9627 struct trace_array *tr;
9628
9629 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9630 instance_mkdir,
9631 instance_rmdir);
9632 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9633 return;
9634
9635 guard(mutex)(&event_mutex);
9636 guard(mutex)(&trace_types_lock);
9637
9638 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9639 if (!tr->name)
9640 continue;
9641 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9642 "Failed to create instance directory\n"))
9643 return;
9644 }
9645 }
9646
9647 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9648 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9649 {
9650 int cpu;
9651
9652 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9653 tr, &show_traces_fops);
9654
9655 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9656 tr, &set_tracer_fops);
9657
9658 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9659 tr, &tracing_cpumask_fops);
9660
9661 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9662 tr, &tracing_iter_fops);
9663
9664 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9665 tr, &tracing_fops);
9666
9667 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9668 tr, &tracing_pipe_fops);
9669
9670 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9671 tr, &tracing_entries_fops);
9672
9673 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9674 tr, &tracing_total_entries_fops);
9675
9676 trace_create_file("free_buffer", 0200, d_tracer,
9677 tr, &tracing_free_buffer_fops);
9678
9679 trace_create_file("trace_marker", 0220, d_tracer,
9680 tr, &tracing_mark_fops);
9681
9682 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9683
9684 trace_create_file("trace_marker_raw", 0220, d_tracer,
9685 tr, &tracing_mark_raw_fops);
9686
9687 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9688 &trace_clock_fops);
9689
9690 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9691 tr, &rb_simple_fops);
9692
9693 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9694 &trace_time_stamp_mode_fops);
9695
9696 tr->buffer_percent = 50;
9697
9698 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9699 tr, &buffer_percent_fops);
9700
9701 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9702 tr, &buffer_subbuf_size_fops);
9703
9704 create_trace_options_dir(tr);
9705
9706 #ifdef CONFIG_TRACER_MAX_TRACE
9707 trace_create_maxlat_file(tr, d_tracer);
9708 #endif
9709
9710 if (ftrace_create_function_files(tr, d_tracer))
9711 MEM_FAIL(1, "Could not allocate function filter files");
9712
9713 if (tr->range_addr_start) {
9714 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9715 tr, &last_boot_fops);
9716 #ifdef CONFIG_TRACER_SNAPSHOT
9717 } else {
9718 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9719 tr, &snapshot_fops);
9720 #endif
9721 }
9722
9723 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9724 tr, &tracing_err_log_fops);
9725
9726 for_each_tracing_cpu(cpu)
9727 tracing_init_tracefs_percpu(tr, cpu);
9728
9729 ftrace_init_tracefs(tr, d_tracer);
9730 }
9731
trace_automount(struct dentry * mntpt,void * ingore)9732 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9733 {
9734 struct vfsmount *mnt;
9735 struct file_system_type *type;
9736
9737 /*
9738 * To maintain backward compatibility for tools that mount
9739 * debugfs to get to the tracing facility, tracefs is automatically
9740 * mounted to the debugfs/tracing directory.
9741 */
9742 type = get_fs_type("tracefs");
9743 if (!type)
9744 return NULL;
9745 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9746 put_filesystem(type);
9747 if (IS_ERR(mnt))
9748 return NULL;
9749 mntget(mnt);
9750
9751 return mnt;
9752 }
9753
9754 /**
9755 * tracing_init_dentry - initialize top level trace array
9756 *
9757 * This is called when creating files or directories in the tracing
9758 * directory. It is called via fs_initcall() by any of the boot up code
9759 * and expects to return the dentry of the top level tracing directory.
9760 */
tracing_init_dentry(void)9761 int tracing_init_dentry(void)
9762 {
9763 struct trace_array *tr = &global_trace;
9764
9765 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9766 pr_warn("Tracing disabled due to lockdown\n");
9767 return -EPERM;
9768 }
9769
9770 /* The top level trace array uses NULL as parent */
9771 if (tr->dir)
9772 return 0;
9773
9774 if (WARN_ON(!tracefs_initialized()))
9775 return -ENODEV;
9776
9777 /*
9778 * As there may still be users that expect the tracing
9779 * files to exist in debugfs/tracing, we must automount
9780 * the tracefs file system there, so older tools still
9781 * work with the newer kernel.
9782 */
9783 tr->dir = debugfs_create_automount("tracing", NULL,
9784 trace_automount, NULL);
9785
9786 return 0;
9787 }
9788
9789 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9790 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9791
9792 static struct workqueue_struct *eval_map_wq __initdata;
9793 static struct work_struct eval_map_work __initdata;
9794 static struct work_struct tracerfs_init_work __initdata;
9795
eval_map_work_func(struct work_struct * work)9796 static void __init eval_map_work_func(struct work_struct *work)
9797 {
9798 int len;
9799
9800 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9801 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9802 }
9803
trace_eval_init(void)9804 static int __init trace_eval_init(void)
9805 {
9806 INIT_WORK(&eval_map_work, eval_map_work_func);
9807
9808 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9809 if (!eval_map_wq) {
9810 pr_err("Unable to allocate eval_map_wq\n");
9811 /* Do work here */
9812 eval_map_work_func(&eval_map_work);
9813 return -ENOMEM;
9814 }
9815
9816 queue_work(eval_map_wq, &eval_map_work);
9817 return 0;
9818 }
9819
9820 subsys_initcall(trace_eval_init);
9821
trace_eval_sync(void)9822 static int __init trace_eval_sync(void)
9823 {
9824 /* Make sure the eval map updates are finished */
9825 if (eval_map_wq)
9826 destroy_workqueue(eval_map_wq);
9827 return 0;
9828 }
9829
9830 late_initcall_sync(trace_eval_sync);
9831
9832
9833 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9834 static void trace_module_add_evals(struct module *mod)
9835 {
9836 if (!mod->num_trace_evals)
9837 return;
9838
9839 /*
9840 * Modules with bad taint do not have events created, do
9841 * not bother with enums either.
9842 */
9843 if (trace_module_has_bad_taint(mod))
9844 return;
9845
9846 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9847 }
9848
9849 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9850 static void trace_module_remove_evals(struct module *mod)
9851 {
9852 union trace_eval_map_item *map;
9853 union trace_eval_map_item **last = &trace_eval_maps;
9854
9855 if (!mod->num_trace_evals)
9856 return;
9857
9858 guard(mutex)(&trace_eval_mutex);
9859
9860 map = trace_eval_maps;
9861
9862 while (map) {
9863 if (map->head.mod == mod)
9864 break;
9865 map = trace_eval_jmp_to_tail(map);
9866 last = &map->tail.next;
9867 map = map->tail.next;
9868 }
9869 if (!map)
9870 return;
9871
9872 *last = trace_eval_jmp_to_tail(map)->tail.next;
9873 kfree(map);
9874 }
9875 #else
trace_module_remove_evals(struct module * mod)9876 static inline void trace_module_remove_evals(struct module *mod) { }
9877 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9878
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9879 static int trace_module_notify(struct notifier_block *self,
9880 unsigned long val, void *data)
9881 {
9882 struct module *mod = data;
9883
9884 switch (val) {
9885 case MODULE_STATE_COMING:
9886 trace_module_add_evals(mod);
9887 break;
9888 case MODULE_STATE_GOING:
9889 trace_module_remove_evals(mod);
9890 break;
9891 }
9892
9893 return NOTIFY_OK;
9894 }
9895
9896 static struct notifier_block trace_module_nb = {
9897 .notifier_call = trace_module_notify,
9898 .priority = 0,
9899 };
9900 #endif /* CONFIG_MODULES */
9901
tracer_init_tracefs_work_func(struct work_struct * work)9902 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9903 {
9904
9905 event_trace_init();
9906
9907 init_tracer_tracefs(&global_trace, NULL);
9908 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9909
9910 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9911 &global_trace, &tracing_thresh_fops);
9912
9913 trace_create_file("README", TRACE_MODE_READ, NULL,
9914 NULL, &tracing_readme_fops);
9915
9916 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9917 NULL, &tracing_saved_cmdlines_fops);
9918
9919 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9920 NULL, &tracing_saved_cmdlines_size_fops);
9921
9922 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9923 NULL, &tracing_saved_tgids_fops);
9924
9925 trace_create_eval_file(NULL);
9926
9927 #ifdef CONFIG_MODULES
9928 register_module_notifier(&trace_module_nb);
9929 #endif
9930
9931 #ifdef CONFIG_DYNAMIC_FTRACE
9932 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9933 NULL, &tracing_dyn_info_fops);
9934 #endif
9935
9936 create_trace_instances(NULL);
9937
9938 update_tracer_options(&global_trace);
9939 }
9940
tracer_init_tracefs(void)9941 static __init int tracer_init_tracefs(void)
9942 {
9943 int ret;
9944
9945 trace_access_lock_init();
9946
9947 ret = tracing_init_dentry();
9948 if (ret)
9949 return 0;
9950
9951 if (eval_map_wq) {
9952 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9953 queue_work(eval_map_wq, &tracerfs_init_work);
9954 } else {
9955 tracer_init_tracefs_work_func(NULL);
9956 }
9957
9958 rv_init_interface();
9959
9960 return 0;
9961 }
9962
9963 fs_initcall(tracer_init_tracefs);
9964
9965 static int trace_die_panic_handler(struct notifier_block *self,
9966 unsigned long ev, void *unused);
9967
9968 static struct notifier_block trace_panic_notifier = {
9969 .notifier_call = trace_die_panic_handler,
9970 .priority = INT_MAX - 1,
9971 };
9972
9973 static struct notifier_block trace_die_notifier = {
9974 .notifier_call = trace_die_panic_handler,
9975 .priority = INT_MAX - 1,
9976 };
9977
9978 /*
9979 * The idea is to execute the following die/panic callback early, in order
9980 * to avoid showing irrelevant information in the trace (like other panic
9981 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9982 * warnings get disabled (to prevent potential log flooding).
9983 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)9984 static int trace_die_panic_handler(struct notifier_block *self,
9985 unsigned long ev, void *unused)
9986 {
9987 bool ftrace_check = false;
9988
9989 trace_android_vh_ftrace_oops_enter(&ftrace_check);
9990
9991 if (!ftrace_dump_on_oops_enabled() || ftrace_check)
9992 return NOTIFY_DONE;
9993
9994 /* The die notifier requires DIE_OOPS to trigger */
9995 if (self == &trace_die_notifier && ev != DIE_OOPS)
9996 return NOTIFY_DONE;
9997
9998 ftrace_dump(DUMP_PARAM);
9999
10000 trace_android_vh_ftrace_oops_exit(&ftrace_check);
10001 return NOTIFY_DONE;
10002 }
10003
10004 /*
10005 * printk is set to max of 1024, we really don't need it that big.
10006 * Nothing should be printing 1000 characters anyway.
10007 */
10008 #define TRACE_MAX_PRINT 1000
10009
10010 /*
10011 * Define here KERN_TRACE so that we have one place to modify
10012 * it if we decide to change what log level the ftrace dump
10013 * should be at.
10014 */
10015 #define KERN_TRACE KERN_EMERG
10016
10017 void
trace_printk_seq(struct trace_seq * s)10018 trace_printk_seq(struct trace_seq *s)
10019 {
10020 bool dump_printk = true;
10021
10022 /* Probably should print a warning here. */
10023 if (s->seq.len >= TRACE_MAX_PRINT)
10024 s->seq.len = TRACE_MAX_PRINT;
10025
10026 /*
10027 * More paranoid code. Although the buffer size is set to
10028 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10029 * an extra layer of protection.
10030 */
10031 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10032 s->seq.len = s->seq.size - 1;
10033
10034 /* should be zero ended, but we are paranoid. */
10035 s->buffer[s->seq.len] = 0;
10036
10037 trace_android_vh_ftrace_dump_buffer(s, &dump_printk);
10038 if (dump_printk)
10039 printk(KERN_TRACE "%s", s->buffer);
10040
10041 trace_seq_init(s);
10042 }
10043
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10044 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10045 {
10046 iter->tr = tr;
10047 iter->trace = iter->tr->current_trace;
10048 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10049 iter->array_buffer = &tr->array_buffer;
10050
10051 if (iter->trace && iter->trace->open)
10052 iter->trace->open(iter);
10053
10054 /* Annotate start of buffers if we had overruns */
10055 if (ring_buffer_overruns(iter->array_buffer->buffer))
10056 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10057
10058 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10059 if (trace_clocks[iter->tr->clock_id].in_ns)
10060 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10061
10062 /* Can not use kmalloc for iter.temp and iter.fmt */
10063 iter->temp = static_temp_buf;
10064 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10065 iter->fmt = static_fmt_buf;
10066 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10067 }
10068
trace_init_global_iter(struct trace_iterator * iter)10069 void trace_init_global_iter(struct trace_iterator *iter)
10070 {
10071 trace_init_iter(iter, &global_trace);
10072 }
10073
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10074 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10075 {
10076 /* use static because iter can be a bit big for the stack */
10077 static struct trace_iterator iter;
10078 unsigned int old_userobj;
10079 unsigned long flags;
10080 int cnt = 0, cpu;
10081 bool ftrace_check = true;
10082 bool ftrace_size_check = false;
10083 unsigned long size;
10084
10085 /*
10086 * Always turn off tracing when we dump.
10087 * We don't need to show trace output of what happens
10088 * between multiple crashes.
10089 *
10090 * If the user does a sysrq-z, then they can re-enable
10091 * tracing with echo 1 > tracing_on.
10092 */
10093 tracer_tracing_off(tr);
10094
10095 local_irq_save(flags);
10096
10097 /* Simulate the iterator */
10098 trace_init_iter(&iter, tr);
10099
10100 for_each_tracing_cpu(cpu) {
10101 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10102 size = ring_buffer_size(iter.array_buffer->buffer, cpu);
10103 trace_android_vh_ftrace_size_check(size, &ftrace_size_check);
10104 }
10105
10106 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10107
10108 /* don't look at user memory in panic mode */
10109 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10110
10111 if (ftrace_size_check)
10112 goto out_enable;
10113
10114 if (dump_mode == DUMP_ORIG)
10115 iter.cpu_file = raw_smp_processor_id();
10116 else
10117 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10118
10119 if (tr == &global_trace)
10120 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10121 else
10122 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10123
10124 /* Did function tracer already get disabled? */
10125 if (ftrace_is_dead()) {
10126 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10127 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10128 }
10129
10130 /*
10131 * Ftrace timestmap support two types:
10132 * - ftrace_check = 1, latency format, start with 0 from a specific time.
10133 * - ftrace_check = 0, absolute time format, consistent with kernel time.
10134 * With this vendor hook, we can choose the format from different requirement.
10135 */
10136 trace_android_vh_ftrace_format_check(&ftrace_check);
10137
10138 /*
10139 * We need to stop all tracing on all CPUS to read
10140 * the next buffer. This is a bit expensive, but is
10141 * not done often. We fill all what we can read,
10142 * and then release the locks again.
10143 */
10144
10145 while (!trace_empty(&iter)) {
10146 if (!cnt)
10147 printk(KERN_TRACE "---------------------------------\n");
10148
10149 cnt++;
10150
10151 trace_iterator_reset(&iter);
10152 if (ftrace_check)
10153 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10154
10155 if (trace_find_next_entry_inc(&iter) != NULL) {
10156 int ret;
10157
10158 ret = print_trace_line(&iter);
10159 if (ret != TRACE_TYPE_NO_CONSUME)
10160 trace_consume(&iter);
10161
10162 trace_printk_seq(&iter.seq);
10163 }
10164 touch_nmi_watchdog();
10165 }
10166
10167 if (!cnt)
10168 printk(KERN_TRACE " (ftrace buffer empty)\n");
10169 else
10170 printk(KERN_TRACE "---------------------------------\n");
10171
10172 out_enable:
10173 tr->trace_flags |= old_userobj;
10174
10175 for_each_tracing_cpu(cpu) {
10176 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10177 }
10178 local_irq_restore(flags);
10179 }
10180
ftrace_dump_by_param(void)10181 static void ftrace_dump_by_param(void)
10182 {
10183 bool first_param = true;
10184 char dump_param[MAX_TRACER_SIZE];
10185 char *buf, *token, *inst_name;
10186 struct trace_array *tr;
10187
10188 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10189 buf = dump_param;
10190
10191 while ((token = strsep(&buf, ",")) != NULL) {
10192 if (first_param) {
10193 first_param = false;
10194 if (!strcmp("0", token))
10195 continue;
10196 else if (!strcmp("1", token)) {
10197 ftrace_dump_one(&global_trace, DUMP_ALL);
10198 continue;
10199 }
10200 else if (!strcmp("2", token) ||
10201 !strcmp("orig_cpu", token)) {
10202 ftrace_dump_one(&global_trace, DUMP_ORIG);
10203 continue;
10204 }
10205 }
10206
10207 inst_name = strsep(&token, "=");
10208 tr = trace_array_find(inst_name);
10209 if (!tr) {
10210 printk(KERN_TRACE "Instance %s not found\n", inst_name);
10211 continue;
10212 }
10213
10214 if (token && (!strcmp("2", token) ||
10215 !strcmp("orig_cpu", token)))
10216 ftrace_dump_one(tr, DUMP_ORIG);
10217 else
10218 ftrace_dump_one(tr, DUMP_ALL);
10219 }
10220 }
10221
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10222 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10223 {
10224 static atomic_t dump_running;
10225
10226 /* Only allow one dump user at a time. */
10227 if (atomic_inc_return(&dump_running) != 1) {
10228 atomic_dec(&dump_running);
10229 return;
10230 }
10231
10232 switch (oops_dump_mode) {
10233 case DUMP_ALL:
10234 ftrace_dump_one(&global_trace, DUMP_ALL);
10235 break;
10236 case DUMP_ORIG:
10237 ftrace_dump_one(&global_trace, DUMP_ORIG);
10238 break;
10239 case DUMP_PARAM:
10240 ftrace_dump_by_param();
10241 break;
10242 case DUMP_NONE:
10243 break;
10244 default:
10245 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10246 ftrace_dump_one(&global_trace, DUMP_ALL);
10247 }
10248
10249 atomic_dec(&dump_running);
10250 }
10251 EXPORT_SYMBOL_GPL(ftrace_dump);
10252
10253 #define WRITE_BUFSIZE 4096
10254
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10255 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10256 size_t count, loff_t *ppos,
10257 int (*createfn)(const char *))
10258 {
10259 char *kbuf, *buf, *tmp;
10260 int ret = 0;
10261 size_t done = 0;
10262 size_t size;
10263
10264 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10265 if (!kbuf)
10266 return -ENOMEM;
10267
10268 while (done < count) {
10269 size = count - done;
10270
10271 if (size >= WRITE_BUFSIZE)
10272 size = WRITE_BUFSIZE - 1;
10273
10274 if (copy_from_user(kbuf, buffer + done, size)) {
10275 ret = -EFAULT;
10276 goto out;
10277 }
10278 kbuf[size] = '\0';
10279 buf = kbuf;
10280 do {
10281 tmp = strchr(buf, '\n');
10282 if (tmp) {
10283 *tmp = '\0';
10284 size = tmp - buf + 1;
10285 } else {
10286 size = strlen(buf);
10287 if (done + size < count) {
10288 if (buf != kbuf)
10289 break;
10290 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10291 pr_warn("Line length is too long: Should be less than %d\n",
10292 WRITE_BUFSIZE - 2);
10293 ret = -EINVAL;
10294 goto out;
10295 }
10296 }
10297 done += size;
10298
10299 /* Remove comments */
10300 tmp = strchr(buf, '#');
10301
10302 if (tmp)
10303 *tmp = '\0';
10304
10305 ret = createfn(buf);
10306 if (ret)
10307 goto out;
10308 buf += size;
10309
10310 } while (done < count);
10311 }
10312 ret = done;
10313
10314 out:
10315 kfree(kbuf);
10316
10317 return ret;
10318 }
10319
10320 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10321 __init static bool tr_needs_alloc_snapshot(const char *name)
10322 {
10323 char *test;
10324 int len = strlen(name);
10325 bool ret;
10326
10327 if (!boot_snapshot_index)
10328 return false;
10329
10330 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10331 boot_snapshot_info[len] == '\t')
10332 return true;
10333
10334 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10335 if (!test)
10336 return false;
10337
10338 sprintf(test, "\t%s\t", name);
10339 ret = strstr(boot_snapshot_info, test) == NULL;
10340 kfree(test);
10341 return ret;
10342 }
10343
do_allocate_snapshot(const char * name)10344 __init static void do_allocate_snapshot(const char *name)
10345 {
10346 if (!tr_needs_alloc_snapshot(name))
10347 return;
10348
10349 /*
10350 * When allocate_snapshot is set, the next call to
10351 * allocate_trace_buffers() (called by trace_array_get_by_name())
10352 * will allocate the snapshot buffer. That will alse clear
10353 * this flag.
10354 */
10355 allocate_snapshot = true;
10356 }
10357 #else
do_allocate_snapshot(const char * name)10358 static inline void do_allocate_snapshot(const char *name) { }
10359 #endif
10360
enable_instances(void)10361 __init static void enable_instances(void)
10362 {
10363 struct trace_array *tr;
10364 char *curr_str;
10365 char *name;
10366 char *str;
10367 char *tok;
10368
10369 /* A tab is always appended */
10370 boot_instance_info[boot_instance_index - 1] = '\0';
10371 str = boot_instance_info;
10372
10373 while ((curr_str = strsep(&str, "\t"))) {
10374 phys_addr_t start = 0;
10375 phys_addr_t size = 0;
10376 unsigned long addr = 0;
10377 bool traceprintk = false;
10378 bool traceoff = false;
10379 char *flag_delim;
10380 char *addr_delim;
10381
10382 tok = strsep(&curr_str, ",");
10383
10384 flag_delim = strchr(tok, '^');
10385 addr_delim = strchr(tok, '@');
10386
10387 if (addr_delim)
10388 *addr_delim++ = '\0';
10389
10390 if (flag_delim)
10391 *flag_delim++ = '\0';
10392
10393 name = tok;
10394
10395 if (flag_delim) {
10396 char *flag;
10397
10398 while ((flag = strsep(&flag_delim, "^"))) {
10399 if (strcmp(flag, "traceoff") == 0) {
10400 traceoff = true;
10401 } else if ((strcmp(flag, "printk") == 0) ||
10402 (strcmp(flag, "traceprintk") == 0) ||
10403 (strcmp(flag, "trace_printk") == 0)) {
10404 traceprintk = true;
10405 } else {
10406 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10407 flag, name);
10408 }
10409 }
10410 }
10411
10412 tok = addr_delim;
10413 if (tok && isdigit(*tok)) {
10414 start = memparse(tok, &tok);
10415 if (!start) {
10416 pr_warn("Tracing: Invalid boot instance address for %s\n",
10417 name);
10418 continue;
10419 }
10420 if (*tok != ':') {
10421 pr_warn("Tracing: No size specified for instance %s\n", name);
10422 continue;
10423 }
10424 tok++;
10425 size = memparse(tok, &tok);
10426 if (!size) {
10427 pr_warn("Tracing: Invalid boot instance size for %s\n",
10428 name);
10429 continue;
10430 }
10431 } else if (tok) {
10432 if (!reserve_mem_find_by_name(tok, &start, &size)) {
10433 start = 0;
10434 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10435 continue;
10436 }
10437 }
10438
10439 if (start) {
10440 addr = map_pages(start, size);
10441 if (addr) {
10442 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10443 name, &start, (unsigned long)size);
10444 } else {
10445 pr_warn("Tracing: Failed to map boot instance %s\n", name);
10446 continue;
10447 }
10448 } else {
10449 /* Only non mapped buffers have snapshot buffers */
10450 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10451 do_allocate_snapshot(name);
10452 }
10453
10454 tr = trace_array_create_systems(name, NULL, addr, size);
10455 if (IS_ERR(tr)) {
10456 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10457 continue;
10458 }
10459
10460 if (traceoff)
10461 tracer_tracing_off(tr);
10462
10463 if (traceprintk)
10464 update_printk_trace(tr);
10465
10466 /*
10467 * If start is set, then this is a mapped buffer, and
10468 * cannot be deleted by user space, so keep the reference
10469 * to it.
10470 */
10471 if (start) {
10472 tr->flags |= TRACE_ARRAY_FL_BOOT;
10473 tr->ref++;
10474 }
10475
10476 while ((tok = strsep(&curr_str, ","))) {
10477 early_enable_events(tr, tok, true);
10478 }
10479 }
10480 }
10481
tracer_alloc_buffers(void)10482 __init static int tracer_alloc_buffers(void)
10483 {
10484 int ring_buf_size;
10485 int ret = -ENOMEM;
10486
10487
10488 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10489 pr_warn("Tracing disabled due to lockdown\n");
10490 return -EPERM;
10491 }
10492
10493 /*
10494 * Make sure we don't accidentally add more trace options
10495 * than we have bits for.
10496 */
10497 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10498
10499 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10500 return -ENOMEM;
10501
10502 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10503 goto out_free_buffer_mask;
10504
10505 /* Only allocate trace_printk buffers if a trace_printk exists */
10506 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10507 /* Must be called before global_trace.buffer is allocated */
10508 trace_printk_init_buffers();
10509
10510 /* To save memory, keep the ring buffer size to its minimum */
10511 if (global_trace.ring_buffer_expanded)
10512 ring_buf_size = trace_buf_size;
10513 else
10514 ring_buf_size = 1;
10515
10516 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10517 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10518
10519 raw_spin_lock_init(&global_trace.start_lock);
10520
10521 /*
10522 * The prepare callbacks allocates some memory for the ring buffer. We
10523 * don't free the buffer if the CPU goes down. If we were to free
10524 * the buffer, then the user would lose any trace that was in the
10525 * buffer. The memory will be removed once the "instance" is removed.
10526 */
10527 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10528 "trace/RB:prepare", trace_rb_cpu_prepare,
10529 NULL);
10530 if (ret < 0)
10531 goto out_free_cpumask;
10532 /* Used for event triggers */
10533 ret = -ENOMEM;
10534 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10535 if (!temp_buffer)
10536 goto out_rm_hp_state;
10537
10538 if (trace_create_savedcmd() < 0)
10539 goto out_free_temp_buffer;
10540
10541 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10542 goto out_free_savedcmd;
10543
10544 /* TODO: make the number of buffers hot pluggable with CPUS */
10545 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10546 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10547 goto out_free_pipe_cpumask;
10548 }
10549 if (global_trace.buffer_disabled)
10550 tracing_off();
10551
10552 if (trace_boot_clock) {
10553 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10554 if (ret < 0)
10555 pr_warn("Trace clock %s not defined, going back to default\n",
10556 trace_boot_clock);
10557 }
10558
10559 /*
10560 * register_tracer() might reference current_trace, so it
10561 * needs to be set before we register anything. This is
10562 * just a bootstrap of current_trace anyway.
10563 */
10564 global_trace.current_trace = &nop_trace;
10565
10566 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10567 #ifdef CONFIG_TRACER_MAX_TRACE
10568 spin_lock_init(&global_trace.snapshot_trigger_lock);
10569 #endif
10570 ftrace_init_global_array_ops(&global_trace);
10571
10572 init_trace_flags_index(&global_trace);
10573
10574 register_tracer(&nop_trace);
10575
10576 /* Function tracing may start here (via kernel command line) */
10577 init_function_trace();
10578
10579 /* All seems OK, enable tracing */
10580 tracing_disabled = 0;
10581
10582 atomic_notifier_chain_register(&panic_notifier_list,
10583 &trace_panic_notifier);
10584
10585 register_die_notifier(&trace_die_notifier);
10586
10587 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10588
10589 INIT_LIST_HEAD(&global_trace.systems);
10590 INIT_LIST_HEAD(&global_trace.events);
10591 INIT_LIST_HEAD(&global_trace.hist_vars);
10592 INIT_LIST_HEAD(&global_trace.err_log);
10593 list_add(&global_trace.list, &ftrace_trace_arrays);
10594
10595 apply_trace_boot_options();
10596
10597 register_snapshot_cmd();
10598
10599 return 0;
10600
10601 out_free_pipe_cpumask:
10602 free_cpumask_var(global_trace.pipe_cpumask);
10603 out_free_savedcmd:
10604 trace_free_saved_cmdlines_buffer();
10605 out_free_temp_buffer:
10606 ring_buffer_free(temp_buffer);
10607 out_rm_hp_state:
10608 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10609 out_free_cpumask:
10610 free_cpumask_var(global_trace.tracing_cpumask);
10611 out_free_buffer_mask:
10612 free_cpumask_var(tracing_buffer_mask);
10613 return ret;
10614 }
10615
ftrace_boot_snapshot(void)10616 void __init ftrace_boot_snapshot(void)
10617 {
10618 #ifdef CONFIG_TRACER_MAX_TRACE
10619 struct trace_array *tr;
10620
10621 if (!snapshot_at_boot)
10622 return;
10623
10624 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10625 if (!tr->allocated_snapshot)
10626 continue;
10627
10628 tracing_snapshot_instance(tr);
10629 trace_array_puts(tr, "** Boot snapshot taken **\n");
10630 }
10631 #endif
10632 }
10633
early_trace_init(void)10634 void __init early_trace_init(void)
10635 {
10636 if (tracepoint_printk) {
10637 tracepoint_print_iter =
10638 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10639 if (MEM_FAIL(!tracepoint_print_iter,
10640 "Failed to allocate trace iterator\n"))
10641 tracepoint_printk = 0;
10642 else
10643 static_key_enable(&tracepoint_printk_key.key);
10644 }
10645 tracer_alloc_buffers();
10646
10647 init_events();
10648 }
10649
trace_init(void)10650 void __init trace_init(void)
10651 {
10652 trace_event_init();
10653
10654 if (boot_instance_index)
10655 enable_instances();
10656 }
10657
clear_boot_tracer(void)10658 __init static void clear_boot_tracer(void)
10659 {
10660 /*
10661 * The default tracer at boot buffer is an init section.
10662 * This function is called in lateinit. If we did not
10663 * find the boot tracer, then clear it out, to prevent
10664 * later registration from accessing the buffer that is
10665 * about to be freed.
10666 */
10667 if (!default_bootup_tracer)
10668 return;
10669
10670 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10671 default_bootup_tracer);
10672 default_bootup_tracer = NULL;
10673 }
10674
10675 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10676 __init static void tracing_set_default_clock(void)
10677 {
10678 /* sched_clock_stable() is determined in late_initcall */
10679 if (!trace_boot_clock && !sched_clock_stable()) {
10680 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10681 pr_warn("Can not set tracing clock due to lockdown\n");
10682 return;
10683 }
10684
10685 printk(KERN_WARNING
10686 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10687 "If you want to keep using the local clock, then add:\n"
10688 " \"trace_clock=local\"\n"
10689 "on the kernel command line\n");
10690 tracing_set_clock(&global_trace, "global");
10691 }
10692 }
10693 #else
tracing_set_default_clock(void)10694 static inline void tracing_set_default_clock(void) { }
10695 #endif
10696
late_trace_init(void)10697 __init static int late_trace_init(void)
10698 {
10699 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10700 static_key_disable(&tracepoint_printk_key.key);
10701 tracepoint_printk = 0;
10702 }
10703
10704 tracing_set_default_clock();
10705 clear_boot_tracer();
10706 return 0;
10707 }
10708
10709 late_initcall_sync(late_trace_init);
10710