1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/kmemleak.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <trace/hooks/ftrace_dump.h>
54
55 #include "trace.h"
56 #include "trace_output.h"
57
58 /*
59 * On boot up, the ring buffer is set to the minimum size, so that
60 * we do not waste memory on systems that are not using tracing.
61 */
62 bool ring_buffer_expanded;
63
64 /*
65 * We need to change this state when a selftest is running.
66 * A selftest will lurk into the ring-buffer to count the
67 * entries inserted during the selftest although some concurrent
68 * insertions into the ring-buffer such as trace_printk could occurred
69 * at the same time, giving false positive or negative results.
70 */
71 static bool __read_mostly tracing_selftest_running;
72
73 /*
74 * If boot-time tracing including tracers/events via kernel cmdline
75 * is running, we do not want to run SELFTEST.
76 */
77 bool __read_mostly tracing_selftest_disabled;
78
79 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)80 void __init disable_tracing_selftest(const char *reason)
81 {
82 if (!tracing_selftest_disabled) {
83 tracing_selftest_disabled = true;
84 pr_info("Ftrace startup test is disabled due to %s\n", reason);
85 }
86 }
87 #endif
88
89 /* Pipe tracepoints to printk */
90 struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94
95 /* For tracers that don't implement custom flags */
96 static struct tracer_opt dummy_tracer_opt[] = {
97 { }
98 };
99
100 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)101 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
102 {
103 return 0;
104 }
105
106 /*
107 * To prevent the comm cache from being overwritten when no
108 * tracing is active, only save the comm when a trace event
109 * occurred.
110 */
111 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
112
113 /*
114 * Kill all tracing for good (never come back).
115 * It is initialized to 1 but will turn to zero if the initialization
116 * of the tracer is successful. But that is the only place that sets
117 * this back to zero.
118 */
119 static int tracing_disabled = 1;
120
121 cpumask_var_t __read_mostly tracing_buffer_mask;
122
123 /*
124 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
125 *
126 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
127 * is set, then ftrace_dump is called. This will output the contents
128 * of the ftrace buffers to the console. This is very useful for
129 * capturing traces that lead to crashes and outputing it to a
130 * serial console.
131 *
132 * It is default off, but you can enable it with either specifying
133 * "ftrace_dump_on_oops" in the kernel command line, or setting
134 * /proc/sys/kernel/ftrace_dump_on_oops
135 * Set 1 if you want to dump buffers of all CPUs
136 * Set 2 if you want to dump the buffer of the CPU that triggered oops
137 */
138
139 enum ftrace_dump_mode ftrace_dump_on_oops;
140
141 /* When set, tracing will stop when a WARN*() is hit */
142 int __disable_trace_on_warning;
143
144 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
145 /* Map of enums to their values, for "eval_map" file */
146 struct trace_eval_map_head {
147 struct module *mod;
148 unsigned long length;
149 };
150
151 union trace_eval_map_item;
152
153 struct trace_eval_map_tail {
154 /*
155 * "end" is first and points to NULL as it must be different
156 * than "mod" or "eval_string"
157 */
158 union trace_eval_map_item *next;
159 const char *end; /* points to NULL */
160 };
161
162 static DEFINE_MUTEX(trace_eval_mutex);
163
164 /*
165 * The trace_eval_maps are saved in an array with two extra elements,
166 * one at the beginning, and one at the end. The beginning item contains
167 * the count of the saved maps (head.length), and the module they
168 * belong to if not built in (head.mod). The ending item contains a
169 * pointer to the next array of saved eval_map items.
170 */
171 union trace_eval_map_item {
172 struct trace_eval_map map;
173 struct trace_eval_map_head head;
174 struct trace_eval_map_tail tail;
175 };
176
177 static union trace_eval_map_item *trace_eval_maps;
178 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
179
180 int tracing_set_tracer(struct trace_array *tr, const char *buf);
181 static void ftrace_trace_userstack(struct trace_array *tr,
182 struct trace_buffer *buffer,
183 unsigned int trace_ctx);
184
185 #define MAX_TRACER_SIZE 100
186 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
187 static char *default_bootup_tracer;
188
189 static bool allocate_snapshot;
190
set_cmdline_ftrace(char * str)191 static int __init set_cmdline_ftrace(char *str)
192 {
193 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
194 default_bootup_tracer = bootup_tracer_buf;
195 /* We are using ftrace early, expand it */
196 ring_buffer_expanded = true;
197 return 1;
198 }
199 __setup("ftrace=", set_cmdline_ftrace);
200
set_ftrace_dump_on_oops(char * str)201 static int __init set_ftrace_dump_on_oops(char *str)
202 {
203 if (*str++ != '=' || !*str || !strcmp("1", str)) {
204 ftrace_dump_on_oops = DUMP_ALL;
205 return 1;
206 }
207
208 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
209 ftrace_dump_on_oops = DUMP_ORIG;
210 return 1;
211 }
212
213 return 0;
214 }
215 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
216
stop_trace_on_warning(char * str)217 static int __init stop_trace_on_warning(char *str)
218 {
219 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
220 __disable_trace_on_warning = 1;
221 return 1;
222 }
223 __setup("traceoff_on_warning", stop_trace_on_warning);
224
boot_alloc_snapshot(char * str)225 static int __init boot_alloc_snapshot(char *str)
226 {
227 allocate_snapshot = true;
228 /* We also need the main ring buffer expanded */
229 ring_buffer_expanded = true;
230 return 1;
231 }
232 __setup("alloc_snapshot", boot_alloc_snapshot);
233
234
235 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
236
set_trace_boot_options(char * str)237 static int __init set_trace_boot_options(char *str)
238 {
239 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
240 return 1;
241 }
242 __setup("trace_options=", set_trace_boot_options);
243
244 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
245 static char *trace_boot_clock __initdata;
246
set_trace_boot_clock(char * str)247 static int __init set_trace_boot_clock(char *str)
248 {
249 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
250 trace_boot_clock = trace_boot_clock_buf;
251 return 1;
252 }
253 __setup("trace_clock=", set_trace_boot_clock);
254
set_tracepoint_printk(char * str)255 static int __init set_tracepoint_printk(char *str)
256 {
257 /* Ignore the "tp_printk_stop_on_boot" param */
258 if (*str == '_')
259 return 0;
260
261 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
262 tracepoint_printk = 1;
263 return 1;
264 }
265 __setup("tp_printk", set_tracepoint_printk);
266
set_tracepoint_printk_stop(char * str)267 static int __init set_tracepoint_printk_stop(char *str)
268 {
269 tracepoint_printk_stop_on_boot = true;
270 return 1;
271 }
272 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
273
ns2usecs(u64 nsec)274 unsigned long long ns2usecs(u64 nsec)
275 {
276 nsec += 500;
277 do_div(nsec, 1000);
278 return nsec;
279 }
280
281 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)282 trace_process_export(struct trace_export *export,
283 struct ring_buffer_event *event, int flag)
284 {
285 struct trace_entry *entry;
286 unsigned int size = 0;
287
288 if (export->flags & flag) {
289 entry = ring_buffer_event_data(event);
290 size = ring_buffer_event_length(event);
291 export->write(export, entry, size);
292 }
293 }
294
295 static DEFINE_MUTEX(ftrace_export_lock);
296
297 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
298
299 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
300 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
301 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
302
ftrace_exports_enable(struct trace_export * export)303 static inline void ftrace_exports_enable(struct trace_export *export)
304 {
305 if (export->flags & TRACE_EXPORT_FUNCTION)
306 static_branch_inc(&trace_function_exports_enabled);
307
308 if (export->flags & TRACE_EXPORT_EVENT)
309 static_branch_inc(&trace_event_exports_enabled);
310
311 if (export->flags & TRACE_EXPORT_MARKER)
312 static_branch_inc(&trace_marker_exports_enabled);
313 }
314
ftrace_exports_disable(struct trace_export * export)315 static inline void ftrace_exports_disable(struct trace_export *export)
316 {
317 if (export->flags & TRACE_EXPORT_FUNCTION)
318 static_branch_dec(&trace_function_exports_enabled);
319
320 if (export->flags & TRACE_EXPORT_EVENT)
321 static_branch_dec(&trace_event_exports_enabled);
322
323 if (export->flags & TRACE_EXPORT_MARKER)
324 static_branch_dec(&trace_marker_exports_enabled);
325 }
326
ftrace_exports(struct ring_buffer_event * event,int flag)327 static void ftrace_exports(struct ring_buffer_event *event, int flag)
328 {
329 struct trace_export *export;
330
331 preempt_disable_notrace();
332
333 export = rcu_dereference_raw_check(ftrace_exports_list);
334 while (export) {
335 trace_process_export(export, event, flag);
336 export = rcu_dereference_raw_check(export->next);
337 }
338
339 preempt_enable_notrace();
340 }
341
342 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)343 add_trace_export(struct trace_export **list, struct trace_export *export)
344 {
345 rcu_assign_pointer(export->next, *list);
346 /*
347 * We are entering export into the list but another
348 * CPU might be walking that list. We need to make sure
349 * the export->next pointer is valid before another CPU sees
350 * the export pointer included into the list.
351 */
352 rcu_assign_pointer(*list, export);
353 }
354
355 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)356 rm_trace_export(struct trace_export **list, struct trace_export *export)
357 {
358 struct trace_export **p;
359
360 for (p = list; *p != NULL; p = &(*p)->next)
361 if (*p == export)
362 break;
363
364 if (*p != export)
365 return -1;
366
367 rcu_assign_pointer(*p, (*p)->next);
368
369 return 0;
370 }
371
372 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)373 add_ftrace_export(struct trace_export **list, struct trace_export *export)
374 {
375 ftrace_exports_enable(export);
376
377 add_trace_export(list, export);
378 }
379
380 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)381 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383 int ret;
384
385 ret = rm_trace_export(list, export);
386 ftrace_exports_disable(export);
387
388 return ret;
389 }
390
register_ftrace_export(struct trace_export * export)391 int register_ftrace_export(struct trace_export *export)
392 {
393 if (WARN_ON_ONCE(!export->write))
394 return -1;
395
396 mutex_lock(&ftrace_export_lock);
397
398 add_ftrace_export(&ftrace_exports_list, export);
399
400 mutex_unlock(&ftrace_export_lock);
401
402 return 0;
403 }
404 EXPORT_SYMBOL_GPL(register_ftrace_export);
405
unregister_ftrace_export(struct trace_export * export)406 int unregister_ftrace_export(struct trace_export *export)
407 {
408 int ret;
409
410 mutex_lock(&ftrace_export_lock);
411
412 ret = rm_ftrace_export(&ftrace_exports_list, export);
413
414 mutex_unlock(&ftrace_export_lock);
415
416 return ret;
417 }
418 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
419
420 /* trace_flags holds trace_options default values */
421 #define TRACE_DEFAULT_FLAGS \
422 (FUNCTION_DEFAULT_FLAGS | \
423 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
424 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
425 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
426 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
427 TRACE_ITER_HASH_PTR)
428
429 /* trace_options that are only supported by global_trace */
430 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
431 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
432
433 /* trace_flags that are default zero for instances */
434 #define ZEROED_TRACE_FLAGS \
435 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
436
437 /*
438 * The global_trace is the descriptor that holds the top-level tracing
439 * buffers for the live tracing.
440 */
441 static struct trace_array global_trace = {
442 .trace_flags = TRACE_DEFAULT_FLAGS,
443 };
444
445 LIST_HEAD(ftrace_trace_arrays);
446
trace_array_get(struct trace_array * this_tr)447 int trace_array_get(struct trace_array *this_tr)
448 {
449 struct trace_array *tr;
450 int ret = -ENODEV;
451
452 mutex_lock(&trace_types_lock);
453 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
454 if (tr == this_tr) {
455 tr->ref++;
456 ret = 0;
457 break;
458 }
459 }
460 mutex_unlock(&trace_types_lock);
461
462 return ret;
463 }
464
__trace_array_put(struct trace_array * this_tr)465 static void __trace_array_put(struct trace_array *this_tr)
466 {
467 WARN_ON(!this_tr->ref);
468 this_tr->ref--;
469 }
470
471 /**
472 * trace_array_put - Decrement the reference counter for this trace array.
473 * @this_tr : pointer to the trace array
474 *
475 * NOTE: Use this when we no longer need the trace array returned by
476 * trace_array_get_by_name(). This ensures the trace array can be later
477 * destroyed.
478 *
479 */
trace_array_put(struct trace_array * this_tr)480 void trace_array_put(struct trace_array *this_tr)
481 {
482 if (!this_tr)
483 return;
484
485 mutex_lock(&trace_types_lock);
486 __trace_array_put(this_tr);
487 mutex_unlock(&trace_types_lock);
488 }
489 EXPORT_SYMBOL_GPL(trace_array_put);
490
tracing_check_open_get_tr(struct trace_array * tr)491 int tracing_check_open_get_tr(struct trace_array *tr)
492 {
493 int ret;
494
495 ret = security_locked_down(LOCKDOWN_TRACEFS);
496 if (ret)
497 return ret;
498
499 if (tracing_disabled)
500 return -ENODEV;
501
502 if (tr && trace_array_get(tr) < 0)
503 return -ENODEV;
504
505 return 0;
506 }
507
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)508 int call_filter_check_discard(struct trace_event_call *call, void *rec,
509 struct trace_buffer *buffer,
510 struct ring_buffer_event *event)
511 {
512 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
513 !filter_match_preds(call->filter, rec)) {
514 __trace_event_discard_commit(buffer, event);
515 return 1;
516 }
517
518 return 0;
519 }
520
521 /**
522 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523 * @filtered_pids: The list of pids to check
524 * @search_pid: The PID to find in @filtered_pids
525 *
526 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527 */
528 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 return trace_pid_list_is_set(filtered_pids, search_pid);
532 }
533
534 /**
535 * trace_ignore_this_task - should a task be ignored for tracing
536 * @filtered_pids: The list of pids to check
537 * @filtered_no_pids: The list of pids not to be traced
538 * @task: The task that should be ignored if not filtered
539 *
540 * Checks if @task should be traced or not from @filtered_pids.
541 * Returns true if @task should *NOT* be traced.
542 * Returns false if @task should be traced.
543 */
544 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)545 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
546 struct trace_pid_list *filtered_no_pids,
547 struct task_struct *task)
548 {
549 /*
550 * If filtered_no_pids is not empty, and the task's pid is listed
551 * in filtered_no_pids, then return true.
552 * Otherwise, if filtered_pids is empty, that means we can
553 * trace all tasks. If it has content, then only trace pids
554 * within filtered_pids.
555 */
556
557 return (filtered_pids &&
558 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
559 (filtered_no_pids &&
560 trace_find_filtered_pid(filtered_no_pids, task->pid));
561 }
562
563 /**
564 * trace_filter_add_remove_task - Add or remove a task from a pid_list
565 * @pid_list: The list to modify
566 * @self: The current task for fork or NULL for exit
567 * @task: The task to add or remove
568 *
569 * If adding a task, if @self is defined, the task is only added if @self
570 * is also included in @pid_list. This happens on fork and tasks should
571 * only be added when the parent is listed. If @self is NULL, then the
572 * @task pid will be removed from the list, which would happen on exit
573 * of a task.
574 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)575 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
576 struct task_struct *self,
577 struct task_struct *task)
578 {
579 if (!pid_list)
580 return;
581
582 /* For forks, we only add if the forking task is listed */
583 if (self) {
584 if (!trace_find_filtered_pid(pid_list, self->pid))
585 return;
586 }
587
588 /* "self" is set for forks, and NULL for exits */
589 if (self)
590 trace_pid_list_set(pid_list, task->pid);
591 else
592 trace_pid_list_clear(pid_list, task->pid);
593 }
594
595 /**
596 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
597 * @pid_list: The pid list to show
598 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
599 * @pos: The position of the file
600 *
601 * This is used by the seq_file "next" operation to iterate the pids
602 * listed in a trace_pid_list structure.
603 *
604 * Returns the pid+1 as we want to display pid of zero, but NULL would
605 * stop the iteration.
606 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)607 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
608 {
609 long pid = (unsigned long)v;
610 unsigned int next;
611
612 (*pos)++;
613
614 /* pid already is +1 of the actual previous bit */
615 if (trace_pid_list_next(pid_list, pid, &next) < 0)
616 return NULL;
617
618 pid = next;
619
620 /* Return pid + 1 to allow zero to be represented */
621 return (void *)(pid + 1);
622 }
623
624 /**
625 * trace_pid_start - Used for seq_file to start reading pid lists
626 * @pid_list: The pid list to show
627 * @pos: The position of the file
628 *
629 * This is used by seq_file "start" operation to start the iteration
630 * of listing pids.
631 *
632 * Returns the pid+1 as we want to display pid of zero, but NULL would
633 * stop the iteration.
634 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)635 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
636 {
637 unsigned long pid;
638 unsigned int first;
639 loff_t l = 0;
640
641 if (trace_pid_list_first(pid_list, &first) < 0)
642 return NULL;
643
644 pid = first;
645
646 /* Return pid + 1 so that zero can be the exit value */
647 for (pid++; pid && l < *pos;
648 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
649 ;
650 return (void *)pid;
651 }
652
653 /**
654 * trace_pid_show - show the current pid in seq_file processing
655 * @m: The seq_file structure to write into
656 * @v: A void pointer of the pid (+1) value to display
657 *
658 * Can be directly used by seq_file operations to display the current
659 * pid value.
660 */
trace_pid_show(struct seq_file * m,void * v)661 int trace_pid_show(struct seq_file *m, void *v)
662 {
663 unsigned long pid = (unsigned long)v - 1;
664
665 seq_printf(m, "%lu\n", pid);
666 return 0;
667 }
668
669 /* 128 should be much more than enough */
670 #define PID_BUF_SIZE 127
671
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)672 int trace_pid_write(struct trace_pid_list *filtered_pids,
673 struct trace_pid_list **new_pid_list,
674 const char __user *ubuf, size_t cnt)
675 {
676 struct trace_pid_list *pid_list;
677 struct trace_parser parser;
678 unsigned long val;
679 int nr_pids = 0;
680 ssize_t read = 0;
681 ssize_t ret;
682 loff_t pos;
683 pid_t pid;
684
685 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
686 return -ENOMEM;
687
688 /*
689 * Always recreate a new array. The write is an all or nothing
690 * operation. Always create a new array when adding new pids by
691 * the user. If the operation fails, then the current list is
692 * not modified.
693 */
694 pid_list = trace_pid_list_alloc();
695 if (!pid_list) {
696 trace_parser_put(&parser);
697 return -ENOMEM;
698 }
699
700 if (filtered_pids) {
701 /* copy the current bits to the new max */
702 ret = trace_pid_list_first(filtered_pids, &pid);
703 while (!ret) {
704 trace_pid_list_set(pid_list, pid);
705 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
706 nr_pids++;
707 }
708 }
709
710 ret = 0;
711 while (cnt > 0) {
712
713 pos = 0;
714
715 ret = trace_get_user(&parser, ubuf, cnt, &pos);
716 if (ret < 0)
717 break;
718
719 read += ret;
720 ubuf += ret;
721 cnt -= ret;
722
723 if (!trace_parser_loaded(&parser))
724 break;
725
726 ret = -EINVAL;
727 if (kstrtoul(parser.buffer, 0, &val))
728 break;
729
730 pid = (pid_t)val;
731
732 if (trace_pid_list_set(pid_list, pid) < 0) {
733 ret = -1;
734 break;
735 }
736 nr_pids++;
737
738 trace_parser_clear(&parser);
739 ret = 0;
740 }
741 trace_parser_put(&parser);
742
743 if (ret < 0) {
744 trace_pid_list_free(pid_list);
745 return ret;
746 }
747
748 if (!nr_pids) {
749 /* Cleared the list of pids */
750 trace_pid_list_free(pid_list);
751 pid_list = NULL;
752 }
753
754 *new_pid_list = pid_list;
755
756 return read;
757 }
758
buffer_ftrace_now(struct array_buffer * buf,int cpu)759 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
760 {
761 u64 ts;
762
763 /* Early boot up does not have a buffer yet */
764 if (!buf->buffer)
765 return trace_clock_local();
766
767 ts = ring_buffer_time_stamp(buf->buffer);
768 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
769
770 return ts;
771 }
772
ftrace_now(int cpu)773 u64 ftrace_now(int cpu)
774 {
775 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
776 }
777
778 /**
779 * tracing_is_enabled - Show if global_trace has been enabled
780 *
781 * Shows if the global trace has been enabled or not. It uses the
782 * mirror flag "buffer_disabled" to be used in fast paths such as for
783 * the irqsoff tracer. But it may be inaccurate due to races. If you
784 * need to know the accurate state, use tracing_is_on() which is a little
785 * slower, but accurate.
786 */
tracing_is_enabled(void)787 int tracing_is_enabled(void)
788 {
789 /*
790 * For quick access (irqsoff uses this in fast path), just
791 * return the mirror variable of the state of the ring buffer.
792 * It's a little racy, but we don't really care.
793 */
794 smp_rmb();
795 return !global_trace.buffer_disabled;
796 }
797
798 /*
799 * trace_buf_size is the size in bytes that is allocated
800 * for a buffer. Note, the number of bytes is always rounded
801 * to page size.
802 *
803 * This number is purposely set to a low number of 16384.
804 * If the dump on oops happens, it will be much appreciated
805 * to not have to wait for all that output. Anyway this can be
806 * boot time and run time configurable.
807 */
808 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
809
810 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
811
812 /* trace_types holds a link list of available tracers. */
813 static struct tracer *trace_types __read_mostly;
814
815 /*
816 * trace_types_lock is used to protect the trace_types list.
817 */
818 DEFINE_MUTEX(trace_types_lock);
819
820 /*
821 * serialize the access of the ring buffer
822 *
823 * ring buffer serializes readers, but it is low level protection.
824 * The validity of the events (which returns by ring_buffer_peek() ..etc)
825 * are not protected by ring buffer.
826 *
827 * The content of events may become garbage if we allow other process consumes
828 * these events concurrently:
829 * A) the page of the consumed events may become a normal page
830 * (not reader page) in ring buffer, and this page will be rewritten
831 * by events producer.
832 * B) The page of the consumed events may become a page for splice_read,
833 * and this page will be returned to system.
834 *
835 * These primitives allow multi process access to different cpu ring buffer
836 * concurrently.
837 *
838 * These primitives don't distinguish read-only and read-consume access.
839 * Multi read-only access are also serialized.
840 */
841
842 #ifdef CONFIG_SMP
843 static DECLARE_RWSEM(all_cpu_access_lock);
844 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
845
trace_access_lock(int cpu)846 static inline void trace_access_lock(int cpu)
847 {
848 if (cpu == RING_BUFFER_ALL_CPUS) {
849 /* gain it for accessing the whole ring buffer. */
850 down_write(&all_cpu_access_lock);
851 } else {
852 /* gain it for accessing a cpu ring buffer. */
853
854 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
855 down_read(&all_cpu_access_lock);
856
857 /* Secondly block other access to this @cpu ring buffer. */
858 mutex_lock(&per_cpu(cpu_access_lock, cpu));
859 }
860 }
861
trace_access_unlock(int cpu)862 static inline void trace_access_unlock(int cpu)
863 {
864 if (cpu == RING_BUFFER_ALL_CPUS) {
865 up_write(&all_cpu_access_lock);
866 } else {
867 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
868 up_read(&all_cpu_access_lock);
869 }
870 }
871
trace_access_lock_init(void)872 static inline void trace_access_lock_init(void)
873 {
874 int cpu;
875
876 for_each_possible_cpu(cpu)
877 mutex_init(&per_cpu(cpu_access_lock, cpu));
878 }
879
880 #else
881
882 static DEFINE_MUTEX(access_lock);
883
trace_access_lock(int cpu)884 static inline void trace_access_lock(int cpu)
885 {
886 (void)cpu;
887 mutex_lock(&access_lock);
888 }
889
trace_access_unlock(int cpu)890 static inline void trace_access_unlock(int cpu)
891 {
892 (void)cpu;
893 mutex_unlock(&access_lock);
894 }
895
trace_access_lock_init(void)896 static inline void trace_access_lock_init(void)
897 {
898 }
899
900 #endif
901
902 #ifdef CONFIG_STACKTRACE
903 static void __ftrace_trace_stack(struct trace_buffer *buffer,
904 unsigned int trace_ctx,
905 int skip, struct pt_regs *regs);
906 static inline void ftrace_trace_stack(struct trace_array *tr,
907 struct trace_buffer *buffer,
908 unsigned int trace_ctx,
909 int skip, struct pt_regs *regs);
910
911 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)912 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
913 unsigned int trace_ctx,
914 int skip, struct pt_regs *regs)
915 {
916 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)917 static inline void ftrace_trace_stack(struct trace_array *tr,
918 struct trace_buffer *buffer,
919 unsigned long trace_ctx,
920 int skip, struct pt_regs *regs)
921 {
922 }
923
924 #endif
925
926 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)927 trace_event_setup(struct ring_buffer_event *event,
928 int type, unsigned int trace_ctx)
929 {
930 struct trace_entry *ent = ring_buffer_event_data(event);
931
932 tracing_generic_entry_update(ent, type, trace_ctx);
933 }
934
935 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)936 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
937 int type,
938 unsigned long len,
939 unsigned int trace_ctx)
940 {
941 struct ring_buffer_event *event;
942
943 event = ring_buffer_lock_reserve(buffer, len);
944 if (event != NULL)
945 trace_event_setup(event, type, trace_ctx);
946
947 return event;
948 }
949
tracer_tracing_on(struct trace_array * tr)950 void tracer_tracing_on(struct trace_array *tr)
951 {
952 if (tr->array_buffer.buffer)
953 ring_buffer_record_on(tr->array_buffer.buffer);
954 /*
955 * This flag is looked at when buffers haven't been allocated
956 * yet, or by some tracers (like irqsoff), that just want to
957 * know if the ring buffer has been disabled, but it can handle
958 * races of where it gets disabled but we still do a record.
959 * As the check is in the fast path of the tracers, it is more
960 * important to be fast than accurate.
961 */
962 tr->buffer_disabled = 0;
963 /* Make the flag seen by readers */
964 smp_wmb();
965 }
966
967 /**
968 * tracing_on - enable tracing buffers
969 *
970 * This function enables tracing buffers that may have been
971 * disabled with tracing_off.
972 */
tracing_on(void)973 void tracing_on(void)
974 {
975 tracer_tracing_on(&global_trace);
976 }
977 EXPORT_SYMBOL_GPL(tracing_on);
978
979
980 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)981 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
982 {
983 __this_cpu_write(trace_taskinfo_save, true);
984
985 /* If this is the temp buffer, we need to commit fully */
986 if (this_cpu_read(trace_buffered_event) == event) {
987 /* Length is in event->array[0] */
988 ring_buffer_write(buffer, event->array[0], &event->array[1]);
989 /* Release the temp buffer */
990 this_cpu_dec(trace_buffered_event_cnt);
991 } else
992 ring_buffer_unlock_commit(buffer, event);
993 }
994
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)995 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
996 const char *str, int size)
997 {
998 struct ring_buffer_event *event;
999 struct trace_buffer *buffer;
1000 struct print_entry *entry;
1001 unsigned int trace_ctx;
1002 int alloc;
1003
1004 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1005 return 0;
1006
1007 if (unlikely(tracing_selftest_running || tracing_disabled))
1008 return 0;
1009
1010 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1011
1012 trace_ctx = tracing_gen_ctx();
1013 buffer = tr->array_buffer.buffer;
1014 ring_buffer_nest_start(buffer);
1015 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1016 trace_ctx);
1017 if (!event) {
1018 size = 0;
1019 goto out;
1020 }
1021
1022 entry = ring_buffer_event_data(event);
1023 entry->ip = ip;
1024
1025 memcpy(&entry->buf, str, size);
1026
1027 /* Add a newline if necessary */
1028 if (entry->buf[size - 1] != '\n') {
1029 entry->buf[size] = '\n';
1030 entry->buf[size + 1] = '\0';
1031 } else
1032 entry->buf[size] = '\0';
1033
1034 __buffer_unlock_commit(buffer, event);
1035 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1036 out:
1037 ring_buffer_nest_end(buffer);
1038 return size;
1039 }
1040 EXPORT_SYMBOL_GPL(__trace_array_puts);
1041
1042 /**
1043 * __trace_puts - write a constant string into the trace buffer.
1044 * @ip: The address of the caller
1045 * @str: The constant string to write
1046 * @size: The size of the string.
1047 */
__trace_puts(unsigned long ip,const char * str,int size)1048 int __trace_puts(unsigned long ip, const char *str, int size)
1049 {
1050 return __trace_array_puts(&global_trace, ip, str, size);
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053
1054 /**
1055 * __trace_bputs - write the pointer to a constant string into trace buffer
1056 * @ip: The address of the caller
1057 * @str: The constant string to write to the buffer to
1058 */
__trace_bputs(unsigned long ip,const char * str)1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061 struct ring_buffer_event *event;
1062 struct trace_buffer *buffer;
1063 struct bputs_entry *entry;
1064 unsigned int trace_ctx;
1065 int size = sizeof(struct bputs_entry);
1066 int ret = 0;
1067
1068 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069 return 0;
1070
1071 if (unlikely(tracing_selftest_running || tracing_disabled))
1072 return 0;
1073
1074 trace_ctx = tracing_gen_ctx();
1075 buffer = global_trace.array_buffer.buffer;
1076
1077 ring_buffer_nest_start(buffer);
1078 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079 trace_ctx);
1080 if (!event)
1081 goto out;
1082
1083 entry = ring_buffer_event_data(event);
1084 entry->ip = ip;
1085 entry->str = str;
1086
1087 __buffer_unlock_commit(buffer, event);
1088 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089
1090 ret = 1;
1091 out:
1092 ring_buffer_nest_end(buffer);
1093 return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096
1097 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099 void *cond_data)
1100 {
1101 struct tracer *tracer = tr->current_trace;
1102 unsigned long flags;
1103
1104 if (in_nmi()) {
1105 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1107 return;
1108 }
1109
1110 if (!tr->allocated_snapshot) {
1111 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1112 trace_array_puts(tr, "*** stopping trace here! ***\n");
1113 tracer_tracing_off(tr);
1114 return;
1115 }
1116
1117 /* Note, snapshot can not be used when the tracer uses it */
1118 if (tracer->use_max_tr) {
1119 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1120 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1121 return;
1122 }
1123
1124 local_irq_save(flags);
1125 update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 local_irq_restore(flags);
1127 }
1128
tracing_snapshot_instance(struct trace_array * tr)1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131 tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133
1134 /**
1135 * tracing_snapshot - take a snapshot of the current buffer.
1136 *
1137 * This causes a swap between the snapshot buffer and the current live
1138 * tracing buffer. You can use this to take snapshots of the live
1139 * trace when some condition is triggered, but continue to trace.
1140 *
1141 * Note, make sure to allocate the snapshot with either
1142 * a tracing_snapshot_alloc(), or by doing it manually
1143 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144 *
1145 * If the snapshot buffer is not allocated, it will stop tracing.
1146 * Basically making a permanent snapshot.
1147 */
tracing_snapshot(void)1148 void tracing_snapshot(void)
1149 {
1150 struct trace_array *tr = &global_trace;
1151
1152 tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155
1156 /**
1157 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158 * @tr: The tracing instance to snapshot
1159 * @cond_data: The data to be tested conditionally, and possibly saved
1160 *
1161 * This is the same as tracing_snapshot() except that the snapshot is
1162 * conditional - the snapshot will only happen if the
1163 * cond_snapshot.update() implementation receiving the cond_data
1164 * returns true, which means that the trace array's cond_snapshot
1165 * update() operation used the cond_data to determine whether the
1166 * snapshot should be taken, and if it was, presumably saved it along
1167 * with the snapshot.
1168 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171 tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174
1175 /**
1176 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177 * @tr: The tracing instance
1178 *
1179 * When the user enables a conditional snapshot using
1180 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181 * with the snapshot. This accessor is used to retrieve it.
1182 *
1183 * Should not be called from cond_snapshot.update(), since it takes
1184 * the tr->max_lock lock, which the code calling
1185 * cond_snapshot.update() has already done.
1186 *
1187 * Returns the cond_data associated with the trace array's snapshot.
1188 */
tracing_cond_snapshot_data(struct trace_array * tr)1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191 void *cond_data = NULL;
1192
1193 local_irq_disable();
1194 arch_spin_lock(&tr->max_lock);
1195
1196 if (tr->cond_snapshot)
1197 cond_data = tr->cond_snapshot->cond_data;
1198
1199 arch_spin_unlock(&tr->max_lock);
1200 local_irq_enable();
1201
1202 return cond_data;
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1205
1206 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1207 struct array_buffer *size_buf, int cpu_id);
1208 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1209
tracing_alloc_snapshot_instance(struct trace_array * tr)1210 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1211 {
1212 int ret;
1213
1214 if (!tr->allocated_snapshot) {
1215
1216 /* allocate spare buffer */
1217 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1218 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1219 if (ret < 0)
1220 return ret;
1221
1222 tr->allocated_snapshot = true;
1223 }
1224
1225 return 0;
1226 }
1227
free_snapshot(struct trace_array * tr)1228 static void free_snapshot(struct trace_array *tr)
1229 {
1230 /*
1231 * We don't free the ring buffer. instead, resize it because
1232 * The max_tr ring buffer has some state (e.g. ring->clock) and
1233 * we want preserve it.
1234 */
1235 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1236 set_buffer_entries(&tr->max_buffer, 1);
1237 tracing_reset_online_cpus(&tr->max_buffer);
1238 tr->allocated_snapshot = false;
1239 }
1240
1241 /**
1242 * tracing_alloc_snapshot - allocate snapshot buffer.
1243 *
1244 * This only allocates the snapshot buffer if it isn't already
1245 * allocated - it doesn't also take a snapshot.
1246 *
1247 * This is meant to be used in cases where the snapshot buffer needs
1248 * to be set up for events that can't sleep but need to be able to
1249 * trigger a snapshot.
1250 */
tracing_alloc_snapshot(void)1251 int tracing_alloc_snapshot(void)
1252 {
1253 struct trace_array *tr = &global_trace;
1254 int ret;
1255
1256 ret = tracing_alloc_snapshot_instance(tr);
1257 WARN_ON(ret < 0);
1258
1259 return ret;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1262
1263 /**
1264 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1265 *
1266 * This is similar to tracing_snapshot(), but it will allocate the
1267 * snapshot buffer if it isn't already allocated. Use this only
1268 * where it is safe to sleep, as the allocation may sleep.
1269 *
1270 * This causes a swap between the snapshot buffer and the current live
1271 * tracing buffer. You can use this to take snapshots of the live
1272 * trace when some condition is triggered, but continue to trace.
1273 */
tracing_snapshot_alloc(void)1274 void tracing_snapshot_alloc(void)
1275 {
1276 int ret;
1277
1278 ret = tracing_alloc_snapshot();
1279 if (ret < 0)
1280 return;
1281
1282 tracing_snapshot();
1283 }
1284 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1285
1286 /**
1287 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1288 * @tr: The tracing instance
1289 * @cond_data: User data to associate with the snapshot
1290 * @update: Implementation of the cond_snapshot update function
1291 *
1292 * Check whether the conditional snapshot for the given instance has
1293 * already been enabled, or if the current tracer is already using a
1294 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1295 * save the cond_data and update function inside.
1296 *
1297 * Returns 0 if successful, error otherwise.
1298 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1299 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1300 cond_update_fn_t update)
1301 {
1302 struct cond_snapshot *cond_snapshot;
1303 int ret = 0;
1304
1305 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1306 if (!cond_snapshot)
1307 return -ENOMEM;
1308
1309 cond_snapshot->cond_data = cond_data;
1310 cond_snapshot->update = update;
1311
1312 mutex_lock(&trace_types_lock);
1313
1314 ret = tracing_alloc_snapshot_instance(tr);
1315 if (ret)
1316 goto fail_unlock;
1317
1318 if (tr->current_trace->use_max_tr) {
1319 ret = -EBUSY;
1320 goto fail_unlock;
1321 }
1322
1323 /*
1324 * The cond_snapshot can only change to NULL without the
1325 * trace_types_lock. We don't care if we race with it going
1326 * to NULL, but we want to make sure that it's not set to
1327 * something other than NULL when we get here, which we can
1328 * do safely with only holding the trace_types_lock and not
1329 * having to take the max_lock.
1330 */
1331 if (tr->cond_snapshot) {
1332 ret = -EBUSY;
1333 goto fail_unlock;
1334 }
1335
1336 local_irq_disable();
1337 arch_spin_lock(&tr->max_lock);
1338 tr->cond_snapshot = cond_snapshot;
1339 arch_spin_unlock(&tr->max_lock);
1340 local_irq_enable();
1341
1342 mutex_unlock(&trace_types_lock);
1343
1344 return ret;
1345
1346 fail_unlock:
1347 mutex_unlock(&trace_types_lock);
1348 kfree(cond_snapshot);
1349 return ret;
1350 }
1351 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1352
1353 /**
1354 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1355 * @tr: The tracing instance
1356 *
1357 * Check whether the conditional snapshot for the given instance is
1358 * enabled; if so, free the cond_snapshot associated with it,
1359 * otherwise return -EINVAL.
1360 *
1361 * Returns 0 if successful, error otherwise.
1362 */
tracing_snapshot_cond_disable(struct trace_array * tr)1363 int tracing_snapshot_cond_disable(struct trace_array *tr)
1364 {
1365 int ret = 0;
1366
1367 local_irq_disable();
1368 arch_spin_lock(&tr->max_lock);
1369
1370 if (!tr->cond_snapshot)
1371 ret = -EINVAL;
1372 else {
1373 kfree(tr->cond_snapshot);
1374 tr->cond_snapshot = NULL;
1375 }
1376
1377 arch_spin_unlock(&tr->max_lock);
1378 local_irq_enable();
1379
1380 return ret;
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1383 #else
tracing_snapshot(void)1384 void tracing_snapshot(void)
1385 {
1386 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1387 }
1388 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1389 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1390 {
1391 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1394 int tracing_alloc_snapshot(void)
1395 {
1396 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1397 return -ENODEV;
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1400 void tracing_snapshot_alloc(void)
1401 {
1402 /* Give warning */
1403 tracing_snapshot();
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1406 void *tracing_cond_snapshot_data(struct trace_array *tr)
1407 {
1408 return NULL;
1409 }
1410 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1411 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1412 {
1413 return -ENODEV;
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418 return false;
1419 }
1420 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1421 #define free_snapshot(tr) do { } while (0)
1422 #endif /* CONFIG_TRACER_SNAPSHOT */
1423
tracer_tracing_off(struct trace_array * tr)1424 void tracer_tracing_off(struct trace_array *tr)
1425 {
1426 if (tr->array_buffer.buffer)
1427 ring_buffer_record_off(tr->array_buffer.buffer);
1428 /*
1429 * This flag is looked at when buffers haven't been allocated
1430 * yet, or by some tracers (like irqsoff), that just want to
1431 * know if the ring buffer has been disabled, but it can handle
1432 * races of where it gets disabled but we still do a record.
1433 * As the check is in the fast path of the tracers, it is more
1434 * important to be fast than accurate.
1435 */
1436 tr->buffer_disabled = 1;
1437 /* Make the flag seen by readers */
1438 smp_wmb();
1439 }
1440
1441 /**
1442 * tracing_off - turn off tracing buffers
1443 *
1444 * This function stops the tracing buffers from recording data.
1445 * It does not disable any overhead the tracers themselves may
1446 * be causing. This function simply causes all recording to
1447 * the ring buffers to fail.
1448 */
tracing_off(void)1449 void tracing_off(void)
1450 {
1451 tracer_tracing_off(&global_trace);
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_off);
1454
disable_trace_on_warning(void)1455 void disable_trace_on_warning(void)
1456 {
1457 if (__disable_trace_on_warning) {
1458 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1459 "Disabling tracing due to warning\n");
1460 tracing_off();
1461 }
1462 }
1463
1464 /**
1465 * tracer_tracing_is_on - show real state of ring buffer enabled
1466 * @tr : the trace array to know if ring buffer is enabled
1467 *
1468 * Shows real state of the ring buffer if it is enabled or not.
1469 */
tracer_tracing_is_on(struct trace_array * tr)1470 bool tracer_tracing_is_on(struct trace_array *tr)
1471 {
1472 if (tr->array_buffer.buffer)
1473 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1474 return !tr->buffer_disabled;
1475 }
1476
1477 /**
1478 * tracing_is_on - show state of ring buffers enabled
1479 */
tracing_is_on(void)1480 int tracing_is_on(void)
1481 {
1482 return tracer_tracing_is_on(&global_trace);
1483 }
1484 EXPORT_SYMBOL_GPL(tracing_is_on);
1485
set_buf_size(char * str)1486 static int __init set_buf_size(char *str)
1487 {
1488 unsigned long buf_size;
1489
1490 if (!str)
1491 return 0;
1492 buf_size = memparse(str, &str);
1493 /*
1494 * nr_entries can not be zero and the startup
1495 * tests require some buffer space. Therefore
1496 * ensure we have at least 4096 bytes of buffer.
1497 */
1498 trace_buf_size = max(4096UL, buf_size);
1499 return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502
set_tracing_thresh(char * str)1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 unsigned long threshold;
1506 int ret;
1507
1508 if (!str)
1509 return 0;
1510 ret = kstrtoul(str, 0, &threshold);
1511 if (ret < 0)
1512 return 0;
1513 tracing_thresh = threshold * 1000;
1514 return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517
nsecs_to_usecs(unsigned long nsecs)1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 return nsecs / 1000;
1521 }
1522
1523 /*
1524 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527 * of strings in the order that the evals (enum) were defined.
1528 */
1529 #undef C
1530 #define C(a, b) b
1531
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 TRACE_FLAGS
1535 NULL
1536 };
1537
1538 static struct {
1539 u64 (*func)(void);
1540 const char *name;
1541 int in_ns; /* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 { trace_clock_local, "local", 1 },
1544 { trace_clock_global, "global", 1 },
1545 { trace_clock_counter, "counter", 0 },
1546 { trace_clock_jiffies, "uptime", 0 },
1547 { trace_clock, "perf", 1 },
1548 { ktime_get_mono_fast_ns, "mono", 1 },
1549 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1550 { ktime_get_boot_fast_ns, "boot", 1 },
1551 ARCH_TRACE_CLOCKS
1552 };
1553
trace_clock_in_ns(struct trace_array * tr)1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 if (trace_clocks[tr->clock_id].in_ns)
1557 return true;
1558
1559 return false;
1560 }
1561
1562 /*
1563 * trace_parser_get_init - gets the buffer for trace parser
1564 */
trace_parser_get_init(struct trace_parser * parser,int size)1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 memset(parser, 0, sizeof(*parser));
1568
1569 parser->buffer = kmalloc(size, GFP_KERNEL);
1570 if (!parser->buffer)
1571 return 1;
1572
1573 parser->size = size;
1574 return 0;
1575 }
1576
1577 /*
1578 * trace_parser_put - frees the buffer for trace parser
1579 */
trace_parser_put(struct trace_parser * parser)1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 kfree(parser->buffer);
1583 parser->buffer = NULL;
1584 }
1585
1586 /*
1587 * trace_get_user - reads the user input string separated by space
1588 * (matched by isspace(ch))
1589 *
1590 * For each string found the 'struct trace_parser' is updated,
1591 * and the function returns.
1592 *
1593 * Returns number of bytes read.
1594 *
1595 * See kernel/trace/trace.h for 'struct trace_parser' details.
1596 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 size_t cnt, loff_t *ppos)
1599 {
1600 char ch;
1601 size_t read = 0;
1602 ssize_t ret;
1603
1604 if (!*ppos)
1605 trace_parser_clear(parser);
1606
1607 ret = get_user(ch, ubuf++);
1608 if (ret)
1609 goto out;
1610
1611 read++;
1612 cnt--;
1613
1614 /*
1615 * The parser is not finished with the last write,
1616 * continue reading the user input without skipping spaces.
1617 */
1618 if (!parser->cont) {
1619 /* skip white space */
1620 while (cnt && isspace(ch)) {
1621 ret = get_user(ch, ubuf++);
1622 if (ret)
1623 goto out;
1624 read++;
1625 cnt--;
1626 }
1627
1628 parser->idx = 0;
1629
1630 /* only spaces were written */
1631 if (isspace(ch) || !ch) {
1632 *ppos += read;
1633 ret = read;
1634 goto out;
1635 }
1636 }
1637
1638 /* read the non-space input */
1639 while (cnt && !isspace(ch) && ch) {
1640 if (parser->idx < parser->size - 1)
1641 parser->buffer[parser->idx++] = ch;
1642 else {
1643 ret = -EINVAL;
1644 goto out;
1645 }
1646 ret = get_user(ch, ubuf++);
1647 if (ret)
1648 goto out;
1649 read++;
1650 cnt--;
1651 }
1652
1653 /* We either got finished input or we have to wait for another call. */
1654 if (isspace(ch) || !ch) {
1655 parser->buffer[parser->idx] = 0;
1656 parser->cont = false;
1657 } else if (parser->idx < parser->size - 1) {
1658 parser->cont = true;
1659 parser->buffer[parser->idx++] = ch;
1660 /* Make sure the parsed string always terminates with '\0'. */
1661 parser->buffer[parser->idx] = 0;
1662 } else {
1663 ret = -EINVAL;
1664 goto out;
1665 }
1666
1667 *ppos += read;
1668 ret = read;
1669
1670 out:
1671 return ret;
1672 }
1673
1674 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 int len;
1678
1679 if (trace_seq_used(s) <= s->seq.readpos)
1680 return -EBUSY;
1681
1682 len = trace_seq_used(s) - s->seq.readpos;
1683 if (cnt > len)
1684 cnt = len;
1685 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687 s->seq.readpos += cnt;
1688 return cnt;
1689 }
1690
1691 unsigned long __read_mostly tracing_thresh;
1692
1693 #ifdef CONFIG_TRACER_MAX_TRACE
1694 static const struct file_operations tracing_max_lat_fops;
1695
1696 #ifdef LATENCY_FS_NOTIFY
1697
1698 static struct workqueue_struct *fsnotify_wq;
1699
latency_fsnotify_workfn(struct work_struct * work)1700 static void latency_fsnotify_workfn(struct work_struct *work)
1701 {
1702 struct trace_array *tr = container_of(work, struct trace_array,
1703 fsnotify_work);
1704 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1705 }
1706
latency_fsnotify_workfn_irq(struct irq_work * iwork)1707 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1708 {
1709 struct trace_array *tr = container_of(iwork, struct trace_array,
1710 fsnotify_irqwork);
1711 queue_work(fsnotify_wq, &tr->fsnotify_work);
1712 }
1713
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1714 static void trace_create_maxlat_file(struct trace_array *tr,
1715 struct dentry *d_tracer)
1716 {
1717 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1718 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1719 tr->d_max_latency = trace_create_file("tracing_max_latency",
1720 TRACE_MODE_WRITE,
1721 d_tracer, tr,
1722 &tracing_max_lat_fops);
1723 }
1724
latency_fsnotify_init(void)1725 __init static int latency_fsnotify_init(void)
1726 {
1727 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1728 WQ_UNBOUND | WQ_HIGHPRI, 0);
1729 if (!fsnotify_wq) {
1730 pr_err("Unable to allocate tr_max_lat_wq\n");
1731 return -ENOMEM;
1732 }
1733 return 0;
1734 }
1735
1736 late_initcall_sync(latency_fsnotify_init);
1737
latency_fsnotify(struct trace_array * tr)1738 void latency_fsnotify(struct trace_array *tr)
1739 {
1740 if (!fsnotify_wq)
1741 return;
1742 /*
1743 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1744 * possible that we are called from __schedule() or do_idle(), which
1745 * could cause a deadlock.
1746 */
1747 irq_work_queue(&tr->fsnotify_irqwork);
1748 }
1749
1750 #else /* !LATENCY_FS_NOTIFY */
1751
1752 #define trace_create_maxlat_file(tr, d_tracer) \
1753 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1754 d_tracer, tr, &tracing_max_lat_fops)
1755
1756 #endif
1757
1758 /*
1759 * Copy the new maximum trace into the separate maximum-trace
1760 * structure. (this way the maximum trace is permanently saved,
1761 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1762 */
1763 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1764 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1765 {
1766 struct array_buffer *trace_buf = &tr->array_buffer;
1767 struct array_buffer *max_buf = &tr->max_buffer;
1768 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1769 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1770
1771 max_buf->cpu = cpu;
1772 max_buf->time_start = data->preempt_timestamp;
1773
1774 max_data->saved_latency = tr->max_latency;
1775 max_data->critical_start = data->critical_start;
1776 max_data->critical_end = data->critical_end;
1777
1778 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1779 max_data->pid = tsk->pid;
1780 /*
1781 * If tsk == current, then use current_uid(), as that does not use
1782 * RCU. The irq tracer can be called out of RCU scope.
1783 */
1784 if (tsk == current)
1785 max_data->uid = current_uid();
1786 else
1787 max_data->uid = task_uid(tsk);
1788
1789 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1790 max_data->policy = tsk->policy;
1791 max_data->rt_priority = tsk->rt_priority;
1792
1793 /* record this tasks comm */
1794 tracing_record_cmdline(tsk);
1795 latency_fsnotify(tr);
1796 }
1797
1798 /**
1799 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1800 * @tr: tracer
1801 * @tsk: the task with the latency
1802 * @cpu: The cpu that initiated the trace.
1803 * @cond_data: User data associated with a conditional snapshot
1804 *
1805 * Flip the buffers between the @tr and the max_tr and record information
1806 * about which task was the cause of this latency.
1807 */
1808 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1809 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1810 void *cond_data)
1811 {
1812 if (tr->stop_count)
1813 return;
1814
1815 WARN_ON_ONCE(!irqs_disabled());
1816
1817 if (!tr->allocated_snapshot) {
1818 /* Only the nop tracer should hit this when disabling */
1819 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1820 return;
1821 }
1822
1823 arch_spin_lock(&tr->max_lock);
1824
1825 /* Inherit the recordable setting from array_buffer */
1826 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1827 ring_buffer_record_on(tr->max_buffer.buffer);
1828 else
1829 ring_buffer_record_off(tr->max_buffer.buffer);
1830
1831 #ifdef CONFIG_TRACER_SNAPSHOT
1832 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1833 arch_spin_unlock(&tr->max_lock);
1834 return;
1835 }
1836 #endif
1837 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838
1839 __update_max_tr(tr, tsk, cpu);
1840
1841 arch_spin_unlock(&tr->max_lock);
1842
1843 /* Any waiters on the old snapshot buffer need to wake up */
1844 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1845 }
1846
1847 /**
1848 * update_max_tr_single - only copy one trace over, and reset the rest
1849 * @tr: tracer
1850 * @tsk: task with the latency
1851 * @cpu: the cpu of the buffer to copy.
1852 *
1853 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1854 */
1855 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1856 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1857 {
1858 int ret;
1859
1860 if (tr->stop_count)
1861 return;
1862
1863 WARN_ON_ONCE(!irqs_disabled());
1864 if (!tr->allocated_snapshot) {
1865 /* Only the nop tracer should hit this when disabling */
1866 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1867 return;
1868 }
1869
1870 arch_spin_lock(&tr->max_lock);
1871
1872 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1873
1874 if (ret == -EBUSY) {
1875 /*
1876 * We failed to swap the buffer due to a commit taking
1877 * place on this CPU. We fail to record, but we reset
1878 * the max trace buffer (no one writes directly to it)
1879 * and flag that it failed.
1880 * Another reason is resize is in progress.
1881 */
1882 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883 "Failed to swap buffers due to commit or resize in progress\n");
1884 }
1885
1886 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1887
1888 __update_max_tr(tr, tsk, cpu);
1889 arch_spin_unlock(&tr->max_lock);
1890 }
1891
1892 #endif /* CONFIG_TRACER_MAX_TRACE */
1893
wait_on_pipe(struct trace_iterator * iter,int full)1894 static int wait_on_pipe(struct trace_iterator *iter, int full)
1895 {
1896 int ret;
1897
1898 /* Iterators are static, they should be filled or empty */
1899 if (trace_buffer_iter(iter, iter->cpu_file))
1900 return 0;
1901
1902 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1903
1904 #ifdef CONFIG_TRACER_MAX_TRACE
1905 /*
1906 * Make sure this is still the snapshot buffer, as if a snapshot were
1907 * to happen, this would now be the main buffer.
1908 */
1909 if (iter->snapshot)
1910 iter->array_buffer = &iter->tr->max_buffer;
1911 #endif
1912 return ret;
1913 }
1914
1915 #ifdef CONFIG_FTRACE_STARTUP_TEST
1916 static bool selftests_can_run;
1917
1918 struct trace_selftests {
1919 struct list_head list;
1920 struct tracer *type;
1921 };
1922
1923 static LIST_HEAD(postponed_selftests);
1924
save_selftest(struct tracer * type)1925 static int save_selftest(struct tracer *type)
1926 {
1927 struct trace_selftests *selftest;
1928
1929 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1930 if (!selftest)
1931 return -ENOMEM;
1932
1933 selftest->type = type;
1934 list_add(&selftest->list, &postponed_selftests);
1935 return 0;
1936 }
1937
run_tracer_selftest(struct tracer * type)1938 static int run_tracer_selftest(struct tracer *type)
1939 {
1940 struct trace_array *tr = &global_trace;
1941 struct tracer *saved_tracer = tr->current_trace;
1942 int ret;
1943
1944 if (!type->selftest || tracing_selftest_disabled)
1945 return 0;
1946
1947 /*
1948 * If a tracer registers early in boot up (before scheduling is
1949 * initialized and such), then do not run its selftests yet.
1950 * Instead, run it a little later in the boot process.
1951 */
1952 if (!selftests_can_run)
1953 return save_selftest(type);
1954
1955 if (!tracing_is_on()) {
1956 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1957 type->name);
1958 return 0;
1959 }
1960
1961 /*
1962 * Run a selftest on this tracer.
1963 * Here we reset the trace buffer, and set the current
1964 * tracer to be this tracer. The tracer can then run some
1965 * internal tracing to verify that everything is in order.
1966 * If we fail, we do not register this tracer.
1967 */
1968 tracing_reset_online_cpus(&tr->array_buffer);
1969
1970 tr->current_trace = type;
1971
1972 #ifdef CONFIG_TRACER_MAX_TRACE
1973 if (type->use_max_tr) {
1974 /* If we expanded the buffers, make sure the max is expanded too */
1975 if (ring_buffer_expanded)
1976 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1977 RING_BUFFER_ALL_CPUS);
1978 tr->allocated_snapshot = true;
1979 }
1980 #endif
1981
1982 /* the test is responsible for initializing and enabling */
1983 pr_info("Testing tracer %s: ", type->name);
1984 ret = type->selftest(type, tr);
1985 /* the test is responsible for resetting too */
1986 tr->current_trace = saved_tracer;
1987 if (ret) {
1988 printk(KERN_CONT "FAILED!\n");
1989 /* Add the warning after printing 'FAILED' */
1990 WARN_ON(1);
1991 return -1;
1992 }
1993 /* Only reset on passing, to avoid touching corrupted buffers */
1994 tracing_reset_online_cpus(&tr->array_buffer);
1995
1996 #ifdef CONFIG_TRACER_MAX_TRACE
1997 if (type->use_max_tr) {
1998 tr->allocated_snapshot = false;
1999
2000 /* Shrink the max buffer again */
2001 if (ring_buffer_expanded)
2002 ring_buffer_resize(tr->max_buffer.buffer, 1,
2003 RING_BUFFER_ALL_CPUS);
2004 }
2005 #endif
2006
2007 printk(KERN_CONT "PASSED\n");
2008 return 0;
2009 }
2010
init_trace_selftests(void)2011 static __init int init_trace_selftests(void)
2012 {
2013 struct trace_selftests *p, *n;
2014 struct tracer *t, **last;
2015 int ret;
2016
2017 selftests_can_run = true;
2018
2019 mutex_lock(&trace_types_lock);
2020
2021 if (list_empty(&postponed_selftests))
2022 goto out;
2023
2024 pr_info("Running postponed tracer tests:\n");
2025
2026 tracing_selftest_running = true;
2027 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2028 /* This loop can take minutes when sanitizers are enabled, so
2029 * lets make sure we allow RCU processing.
2030 */
2031 cond_resched();
2032 ret = run_tracer_selftest(p->type);
2033 /* If the test fails, then warn and remove from available_tracers */
2034 if (ret < 0) {
2035 WARN(1, "tracer: %s failed selftest, disabling\n",
2036 p->type->name);
2037 last = &trace_types;
2038 for (t = trace_types; t; t = t->next) {
2039 if (t == p->type) {
2040 *last = t->next;
2041 break;
2042 }
2043 last = &t->next;
2044 }
2045 }
2046 list_del(&p->list);
2047 kfree(p);
2048 }
2049 tracing_selftest_running = false;
2050
2051 out:
2052 mutex_unlock(&trace_types_lock);
2053
2054 return 0;
2055 }
2056 core_initcall(init_trace_selftests);
2057 #else
run_tracer_selftest(struct tracer * type)2058 static inline int run_tracer_selftest(struct tracer *type)
2059 {
2060 return 0;
2061 }
2062 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2063
2064 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2065
2066 static void __init apply_trace_boot_options(void);
2067
2068 /**
2069 * register_tracer - register a tracer with the ftrace system.
2070 * @type: the plugin for the tracer
2071 *
2072 * Register a new plugin tracer.
2073 */
register_tracer(struct tracer * type)2074 int __init register_tracer(struct tracer *type)
2075 {
2076 struct tracer *t;
2077 int ret = 0;
2078
2079 if (!type->name) {
2080 pr_info("Tracer must have a name\n");
2081 return -1;
2082 }
2083
2084 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2085 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2086 return -1;
2087 }
2088
2089 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2090 pr_warn("Can not register tracer %s due to lockdown\n",
2091 type->name);
2092 return -EPERM;
2093 }
2094
2095 mutex_lock(&trace_types_lock);
2096
2097 tracing_selftest_running = true;
2098
2099 for (t = trace_types; t; t = t->next) {
2100 if (strcmp(type->name, t->name) == 0) {
2101 /* already found */
2102 pr_info("Tracer %s already registered\n",
2103 type->name);
2104 ret = -1;
2105 goto out;
2106 }
2107 }
2108
2109 if (!type->set_flag)
2110 type->set_flag = &dummy_set_flag;
2111 if (!type->flags) {
2112 /*allocate a dummy tracer_flags*/
2113 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2114 if (!type->flags) {
2115 ret = -ENOMEM;
2116 goto out;
2117 }
2118 type->flags->val = 0;
2119 type->flags->opts = dummy_tracer_opt;
2120 } else
2121 if (!type->flags->opts)
2122 type->flags->opts = dummy_tracer_opt;
2123
2124 /* store the tracer for __set_tracer_option */
2125 type->flags->trace = type;
2126
2127 ret = run_tracer_selftest(type);
2128 if (ret < 0)
2129 goto out;
2130
2131 type->next = trace_types;
2132 trace_types = type;
2133 add_tracer_options(&global_trace, type);
2134
2135 out:
2136 tracing_selftest_running = false;
2137 mutex_unlock(&trace_types_lock);
2138
2139 if (ret || !default_bootup_tracer)
2140 goto out_unlock;
2141
2142 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2143 goto out_unlock;
2144
2145 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2146 /* Do we want this tracer to start on bootup? */
2147 tracing_set_tracer(&global_trace, type->name);
2148 default_bootup_tracer = NULL;
2149
2150 apply_trace_boot_options();
2151
2152 /* disable other selftests, since this will break it. */
2153 disable_tracing_selftest("running a tracer");
2154
2155 out_unlock:
2156 return ret;
2157 }
2158
tracing_reset_cpu(struct array_buffer * buf,int cpu)2159 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2160 {
2161 struct trace_buffer *buffer = buf->buffer;
2162
2163 if (!buffer)
2164 return;
2165
2166 ring_buffer_record_disable(buffer);
2167
2168 /* Make sure all commits have finished */
2169 synchronize_rcu();
2170 ring_buffer_reset_cpu(buffer, cpu);
2171
2172 ring_buffer_record_enable(buffer);
2173 }
2174
tracing_reset_online_cpus(struct array_buffer * buf)2175 void tracing_reset_online_cpus(struct array_buffer *buf)
2176 {
2177 struct trace_buffer *buffer = buf->buffer;
2178
2179 if (!buffer)
2180 return;
2181
2182 ring_buffer_record_disable(buffer);
2183
2184 /* Make sure all commits have finished */
2185 synchronize_rcu();
2186
2187 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2188
2189 ring_buffer_reset_online_cpus(buffer);
2190
2191 ring_buffer_record_enable(buffer);
2192 }
2193
2194 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2195 void tracing_reset_all_online_cpus_unlocked(void)
2196 {
2197 struct trace_array *tr;
2198
2199 lockdep_assert_held(&trace_types_lock);
2200
2201 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2202 if (!tr->clear_trace)
2203 continue;
2204 tr->clear_trace = false;
2205 tracing_reset_online_cpus(&tr->array_buffer);
2206 #ifdef CONFIG_TRACER_MAX_TRACE
2207 tracing_reset_online_cpus(&tr->max_buffer);
2208 #endif
2209 }
2210 }
2211
tracing_reset_all_online_cpus(void)2212 void tracing_reset_all_online_cpus(void)
2213 {
2214 mutex_lock(&trace_types_lock);
2215 tracing_reset_all_online_cpus_unlocked();
2216 mutex_unlock(&trace_types_lock);
2217 }
2218
2219 /*
2220 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2221 * is the tgid last observed corresponding to pid=i.
2222 */
2223 static int *tgid_map;
2224
2225 /* The maximum valid index into tgid_map. */
2226 static size_t tgid_map_max;
2227
2228 #define SAVED_CMDLINES_DEFAULT 128
2229 #define NO_CMDLINE_MAP UINT_MAX
2230 /*
2231 * Preemption must be disabled before acquiring trace_cmdline_lock.
2232 * The various trace_arrays' max_lock must be acquired in a context
2233 * where interrupt is disabled.
2234 */
2235 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2236 struct saved_cmdlines_buffer {
2237 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2238 unsigned *map_cmdline_to_pid;
2239 unsigned cmdline_num;
2240 int cmdline_idx;
2241 char saved_cmdlines[];
2242 };
2243 static struct saved_cmdlines_buffer *savedcmd;
2244
get_saved_cmdlines(int idx)2245 static inline char *get_saved_cmdlines(int idx)
2246 {
2247 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2248 }
2249
set_cmdline(int idx,const char * cmdline)2250 static inline void set_cmdline(int idx, const char *cmdline)
2251 {
2252 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2253 }
2254
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2255 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2256 {
2257 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2258
2259 kfree(s->map_cmdline_to_pid);
2260 kmemleak_free(s);
2261 free_pages((unsigned long)s, order);
2262 }
2263
allocate_cmdlines_buffer(unsigned int val)2264 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2265 {
2266 struct saved_cmdlines_buffer *s;
2267 struct page *page;
2268 int orig_size, size;
2269 int order;
2270
2271 /* Figure out how much is needed to hold the given number of cmdlines */
2272 orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2273 order = get_order(orig_size);
2274 size = 1 << (order + PAGE_SHIFT);
2275 page = alloc_pages(GFP_KERNEL, order);
2276 if (!page)
2277 return NULL;
2278
2279 s = page_address(page);
2280 kmemleak_alloc(s, size, 1, GFP_KERNEL);
2281 memset(s, 0, sizeof(*s));
2282
2283 /* Round up to actual allocation */
2284 val = (size - sizeof(*s)) / TASK_COMM_LEN;
2285 s->cmdline_num = val;
2286
2287 s->map_cmdline_to_pid = kmalloc_array(val,
2288 sizeof(*s->map_cmdline_to_pid),
2289 GFP_KERNEL);
2290 if (!s->map_cmdline_to_pid) {
2291 free_saved_cmdlines_buffer(s);
2292 return NULL;
2293 }
2294
2295 s->cmdline_idx = 0;
2296 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2297 sizeof(s->map_pid_to_cmdline));
2298 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2299 val * sizeof(*s->map_cmdline_to_pid));
2300
2301 return s;
2302 }
2303
trace_create_savedcmd(void)2304 static int trace_create_savedcmd(void)
2305 {
2306 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2307
2308 return savedcmd ? 0 : -ENOMEM;
2309 }
2310
is_tracing_stopped(void)2311 int is_tracing_stopped(void)
2312 {
2313 return global_trace.stop_count;
2314 }
2315
tracing_start_tr(struct trace_array * tr)2316 static void tracing_start_tr(struct trace_array *tr)
2317 {
2318 struct trace_buffer *buffer;
2319 unsigned long flags;
2320
2321 if (tracing_disabled)
2322 return;
2323
2324 raw_spin_lock_irqsave(&tr->start_lock, flags);
2325 if (--tr->stop_count) {
2326 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2327 /* Someone screwed up their debugging */
2328 tr->stop_count = 0;
2329 }
2330 goto out;
2331 }
2332
2333 /* Prevent the buffers from switching */
2334 arch_spin_lock(&tr->max_lock);
2335
2336 buffer = tr->array_buffer.buffer;
2337 if (buffer)
2338 ring_buffer_record_enable(buffer);
2339
2340 #ifdef CONFIG_TRACER_MAX_TRACE
2341 buffer = tr->max_buffer.buffer;
2342 if (buffer)
2343 ring_buffer_record_enable(buffer);
2344 #endif
2345
2346 arch_spin_unlock(&tr->max_lock);
2347
2348 out:
2349 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2350 }
2351
2352 /**
2353 * tracing_start - quick start of the tracer
2354 *
2355 * If tracing is enabled but was stopped by tracing_stop,
2356 * this will start the tracer back up.
2357 */
tracing_start(void)2358 void tracing_start(void)
2359
2360 {
2361 return tracing_start_tr(&global_trace);
2362 }
2363
tracing_stop_tr(struct trace_array * tr)2364 static void tracing_stop_tr(struct trace_array *tr)
2365 {
2366 struct trace_buffer *buffer;
2367 unsigned long flags;
2368
2369 raw_spin_lock_irqsave(&tr->start_lock, flags);
2370 if (tr->stop_count++)
2371 goto out;
2372
2373 /* Prevent the buffers from switching */
2374 arch_spin_lock(&tr->max_lock);
2375
2376 buffer = tr->array_buffer.buffer;
2377 if (buffer)
2378 ring_buffer_record_disable(buffer);
2379
2380 #ifdef CONFIG_TRACER_MAX_TRACE
2381 buffer = tr->max_buffer.buffer;
2382 if (buffer)
2383 ring_buffer_record_disable(buffer);
2384 #endif
2385
2386 arch_spin_unlock(&tr->max_lock);
2387
2388 out:
2389 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2390 }
2391
2392 /**
2393 * tracing_stop - quick stop of the tracer
2394 *
2395 * Light weight way to stop tracing. Use in conjunction with
2396 * tracing_start.
2397 */
tracing_stop(void)2398 void tracing_stop(void)
2399 {
2400 return tracing_stop_tr(&global_trace);
2401 }
2402
trace_save_cmdline(struct task_struct * tsk)2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405 unsigned tpid, idx;
2406
2407 /* treat recording of idle task as a success */
2408 if (!tsk->pid)
2409 return 1;
2410
2411 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412
2413 /*
2414 * It's not the end of the world if we don't get
2415 * the lock, but we also don't want to spin
2416 * nor do we want to disable interrupts,
2417 * so if we miss here, then better luck next time.
2418 *
2419 * This is called within the scheduler and wake up, so interrupts
2420 * had better been disabled and run queue lock been held.
2421 */
2422 lockdep_assert_preemption_disabled();
2423 if (!arch_spin_trylock(&trace_cmdline_lock))
2424 return 0;
2425
2426 idx = savedcmd->map_pid_to_cmdline[tpid];
2427 if (idx == NO_CMDLINE_MAP) {
2428 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2429
2430 savedcmd->map_pid_to_cmdline[tpid] = idx;
2431 savedcmd->cmdline_idx = idx;
2432 }
2433
2434 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2435 set_cmdline(idx, tsk->comm);
2436
2437 arch_spin_unlock(&trace_cmdline_lock);
2438
2439 return 1;
2440 }
2441
__trace_find_cmdline(int pid,char comm[])2442 static void __trace_find_cmdline(int pid, char comm[])
2443 {
2444 unsigned map;
2445 int tpid;
2446
2447 if (!pid) {
2448 strcpy(comm, "<idle>");
2449 return;
2450 }
2451
2452 if (WARN_ON_ONCE(pid < 0)) {
2453 strcpy(comm, "<XXX>");
2454 return;
2455 }
2456
2457 tpid = pid & (PID_MAX_DEFAULT - 1);
2458 map = savedcmd->map_pid_to_cmdline[tpid];
2459 if (map != NO_CMDLINE_MAP) {
2460 tpid = savedcmd->map_cmdline_to_pid[map];
2461 if (tpid == pid) {
2462 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2463 return;
2464 }
2465 }
2466 strcpy(comm, "<...>");
2467 }
2468
trace_find_cmdline(int pid,char comm[])2469 void trace_find_cmdline(int pid, char comm[])
2470 {
2471 preempt_disable();
2472 arch_spin_lock(&trace_cmdline_lock);
2473
2474 __trace_find_cmdline(pid, comm);
2475
2476 arch_spin_unlock(&trace_cmdline_lock);
2477 preempt_enable();
2478 }
2479
trace_find_tgid_ptr(int pid)2480 static int *trace_find_tgid_ptr(int pid)
2481 {
2482 /*
2483 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2484 * if we observe a non-NULL tgid_map then we also observe the correct
2485 * tgid_map_max.
2486 */
2487 int *map = smp_load_acquire(&tgid_map);
2488
2489 if (unlikely(!map || pid > tgid_map_max))
2490 return NULL;
2491
2492 return &map[pid];
2493 }
2494
trace_find_tgid(int pid)2495 int trace_find_tgid(int pid)
2496 {
2497 int *ptr = trace_find_tgid_ptr(pid);
2498
2499 return ptr ? *ptr : 0;
2500 }
2501
trace_save_tgid(struct task_struct * tsk)2502 static int trace_save_tgid(struct task_struct *tsk)
2503 {
2504 int *ptr;
2505
2506 /* treat recording of idle task as a success */
2507 if (!tsk->pid)
2508 return 1;
2509
2510 ptr = trace_find_tgid_ptr(tsk->pid);
2511 if (!ptr)
2512 return 0;
2513
2514 *ptr = tsk->tgid;
2515 return 1;
2516 }
2517
tracing_record_taskinfo_skip(int flags)2518 static bool tracing_record_taskinfo_skip(int flags)
2519 {
2520 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2521 return true;
2522 if (!__this_cpu_read(trace_taskinfo_save))
2523 return true;
2524 return false;
2525 }
2526
2527 /**
2528 * tracing_record_taskinfo - record the task info of a task
2529 *
2530 * @task: task to record
2531 * @flags: TRACE_RECORD_CMDLINE for recording comm
2532 * TRACE_RECORD_TGID for recording tgid
2533 */
tracing_record_taskinfo(struct task_struct * task,int flags)2534 void tracing_record_taskinfo(struct task_struct *task, int flags)
2535 {
2536 bool done;
2537
2538 if (tracing_record_taskinfo_skip(flags))
2539 return;
2540
2541 /*
2542 * Record as much task information as possible. If some fail, continue
2543 * to try to record the others.
2544 */
2545 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2546 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2547
2548 /* If recording any information failed, retry again soon. */
2549 if (!done)
2550 return;
2551
2552 __this_cpu_write(trace_taskinfo_save, false);
2553 }
2554
2555 /**
2556 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2557 *
2558 * @prev: previous task during sched_switch
2559 * @next: next task during sched_switch
2560 * @flags: TRACE_RECORD_CMDLINE for recording comm
2561 * TRACE_RECORD_TGID for recording tgid
2562 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2563 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2564 struct task_struct *next, int flags)
2565 {
2566 bool done;
2567
2568 if (tracing_record_taskinfo_skip(flags))
2569 return;
2570
2571 /*
2572 * Record as much task information as possible. If some fail, continue
2573 * to try to record the others.
2574 */
2575 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2576 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2577 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2578 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2579
2580 /* If recording any information failed, retry again soon. */
2581 if (!done)
2582 return;
2583
2584 __this_cpu_write(trace_taskinfo_save, false);
2585 }
2586
2587 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2588 void tracing_record_cmdline(struct task_struct *task)
2589 {
2590 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2591 }
2592
tracing_record_tgid(struct task_struct * task)2593 void tracing_record_tgid(struct task_struct *task)
2594 {
2595 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2596 }
2597
2598 /*
2599 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2600 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2601 * simplifies those functions and keeps them in sync.
2602 */
trace_handle_return(struct trace_seq * s)2603 enum print_line_t trace_handle_return(struct trace_seq *s)
2604 {
2605 return trace_seq_has_overflowed(s) ?
2606 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2607 }
2608 EXPORT_SYMBOL_GPL(trace_handle_return);
2609
migration_disable_value(void)2610 static unsigned short migration_disable_value(void)
2611 {
2612 #if defined(CONFIG_SMP)
2613 return current->migration_disabled;
2614 #else
2615 return 0;
2616 #endif
2617 }
2618
tracing_gen_ctx_irq_test(unsigned int irqs_status)2619 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2620 {
2621 unsigned int trace_flags = irqs_status;
2622 unsigned int pc;
2623
2624 pc = preempt_count();
2625
2626 if (pc & NMI_MASK)
2627 trace_flags |= TRACE_FLAG_NMI;
2628 if (pc & HARDIRQ_MASK)
2629 trace_flags |= TRACE_FLAG_HARDIRQ;
2630 if (in_serving_softirq())
2631 trace_flags |= TRACE_FLAG_SOFTIRQ;
2632
2633 if (tif_need_resched())
2634 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2635 if (test_preempt_need_resched())
2636 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2637 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2638 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2639 }
2640
2641 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2642 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2643 int type,
2644 unsigned long len,
2645 unsigned int trace_ctx)
2646 {
2647 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2648 }
2649
2650 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2651 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2652 static int trace_buffered_event_ref;
2653
2654 /**
2655 * trace_buffered_event_enable - enable buffering events
2656 *
2657 * When events are being filtered, it is quicker to use a temporary
2658 * buffer to write the event data into if there's a likely chance
2659 * that it will not be committed. The discard of the ring buffer
2660 * is not as fast as committing, and is much slower than copying
2661 * a commit.
2662 *
2663 * When an event is to be filtered, allocate per cpu buffers to
2664 * write the event data into, and if the event is filtered and discarded
2665 * it is simply dropped, otherwise, the entire data is to be committed
2666 * in one shot.
2667 */
trace_buffered_event_enable(void)2668 void trace_buffered_event_enable(void)
2669 {
2670 struct ring_buffer_event *event;
2671 struct page *page;
2672 int cpu;
2673
2674 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2675
2676 if (trace_buffered_event_ref++)
2677 return;
2678
2679 for_each_tracing_cpu(cpu) {
2680 page = alloc_pages_node(cpu_to_node(cpu),
2681 GFP_KERNEL | __GFP_NORETRY, 0);
2682 /* This is just an optimization and can handle failures */
2683 if (!page) {
2684 pr_err("Failed to allocate event buffer\n");
2685 break;
2686 }
2687
2688 event = page_address(page);
2689 memset(event, 0, sizeof(*event));
2690
2691 per_cpu(trace_buffered_event, cpu) = event;
2692
2693 preempt_disable();
2694 if (cpu == smp_processor_id() &&
2695 __this_cpu_read(trace_buffered_event) !=
2696 per_cpu(trace_buffered_event, cpu))
2697 WARN_ON_ONCE(1);
2698 preempt_enable();
2699 }
2700 }
2701
enable_trace_buffered_event(void * data)2702 static void enable_trace_buffered_event(void *data)
2703 {
2704 /* Probably not needed, but do it anyway */
2705 smp_rmb();
2706 this_cpu_dec(trace_buffered_event_cnt);
2707 }
2708
disable_trace_buffered_event(void * data)2709 static void disable_trace_buffered_event(void *data)
2710 {
2711 this_cpu_inc(trace_buffered_event_cnt);
2712 }
2713
2714 /**
2715 * trace_buffered_event_disable - disable buffering events
2716 *
2717 * When a filter is removed, it is faster to not use the buffered
2718 * events, and to commit directly into the ring buffer. Free up
2719 * the temp buffers when there are no more users. This requires
2720 * special synchronization with current events.
2721 */
trace_buffered_event_disable(void)2722 void trace_buffered_event_disable(void)
2723 {
2724 int cpu;
2725
2726 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2727
2728 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2729 return;
2730
2731 if (--trace_buffered_event_ref)
2732 return;
2733
2734 /* For each CPU, set the buffer as used. */
2735 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2736 NULL, true);
2737
2738 /* Wait for all current users to finish */
2739 synchronize_rcu();
2740
2741 for_each_tracing_cpu(cpu) {
2742 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2743 per_cpu(trace_buffered_event, cpu) = NULL;
2744 }
2745
2746 /*
2747 * Wait for all CPUs that potentially started checking if they can use
2748 * their event buffer only after the previous synchronize_rcu() call and
2749 * they still read a valid pointer from trace_buffered_event. It must be
2750 * ensured they don't see cleared trace_buffered_event_cnt else they
2751 * could wrongly decide to use the pointed-to buffer which is now freed.
2752 */
2753 synchronize_rcu();
2754
2755 /* For each CPU, relinquish the buffer */
2756 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2757 true);
2758 }
2759
2760 static struct trace_buffer *temp_buffer;
2761
2762 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2763 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2764 struct trace_event_file *trace_file,
2765 int type, unsigned long len,
2766 unsigned int trace_ctx)
2767 {
2768 struct ring_buffer_event *entry;
2769 struct trace_array *tr = trace_file->tr;
2770 int val;
2771
2772 *current_rb = tr->array_buffer.buffer;
2773
2774 if (!tr->no_filter_buffering_ref &&
2775 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2776 (entry = this_cpu_read(trace_buffered_event))) {
2777 /*
2778 * Filtering is on, so try to use the per cpu buffer first.
2779 * This buffer will simulate a ring_buffer_event,
2780 * where the type_len is zero and the array[0] will
2781 * hold the full length.
2782 * (see include/linux/ring-buffer.h for details on
2783 * how the ring_buffer_event is structured).
2784 *
2785 * Using a temp buffer during filtering and copying it
2786 * on a matched filter is quicker than writing directly
2787 * into the ring buffer and then discarding it when
2788 * it doesn't match. That is because the discard
2789 * requires several atomic operations to get right.
2790 * Copying on match and doing nothing on a failed match
2791 * is still quicker than no copy on match, but having
2792 * to discard out of the ring buffer on a failed match.
2793 */
2794 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2795
2796 val = this_cpu_inc_return(trace_buffered_event_cnt);
2797
2798 /*
2799 * Preemption is disabled, but interrupts and NMIs
2800 * can still come in now. If that happens after
2801 * the above increment, then it will have to go
2802 * back to the old method of allocating the event
2803 * on the ring buffer, and if the filter fails, it
2804 * will have to call ring_buffer_discard_commit()
2805 * to remove it.
2806 *
2807 * Need to also check the unlikely case that the
2808 * length is bigger than the temp buffer size.
2809 * If that happens, then the reserve is pretty much
2810 * guaranteed to fail, as the ring buffer currently
2811 * only allows events less than a page. But that may
2812 * change in the future, so let the ring buffer reserve
2813 * handle the failure in that case.
2814 */
2815 if (val == 1 && likely(len <= max_len)) {
2816 trace_event_setup(entry, type, trace_ctx);
2817 entry->array[0] = len;
2818 return entry;
2819 }
2820 this_cpu_dec(trace_buffered_event_cnt);
2821 }
2822
2823 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2824 trace_ctx);
2825 /*
2826 * If tracing is off, but we have triggers enabled
2827 * we still need to look at the event data. Use the temp_buffer
2828 * to store the trace event for the trigger to use. It's recursive
2829 * safe and will not be recorded anywhere.
2830 */
2831 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2832 *current_rb = temp_buffer;
2833 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2834 trace_ctx);
2835 }
2836 return entry;
2837 }
2838 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2839
2840 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2841 static DEFINE_MUTEX(tracepoint_printk_mutex);
2842
output_printk(struct trace_event_buffer * fbuffer)2843 static void output_printk(struct trace_event_buffer *fbuffer)
2844 {
2845 struct trace_event_call *event_call;
2846 struct trace_event_file *file;
2847 struct trace_event *event;
2848 unsigned long flags;
2849 struct trace_iterator *iter = tracepoint_print_iter;
2850
2851 /* We should never get here if iter is NULL */
2852 if (WARN_ON_ONCE(!iter))
2853 return;
2854
2855 event_call = fbuffer->trace_file->event_call;
2856 if (!event_call || !event_call->event.funcs ||
2857 !event_call->event.funcs->trace)
2858 return;
2859
2860 file = fbuffer->trace_file;
2861 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2862 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2863 !filter_match_preds(file->filter, fbuffer->entry)))
2864 return;
2865
2866 event = &fbuffer->trace_file->event_call->event;
2867
2868 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2869 trace_seq_init(&iter->seq);
2870 iter->ent = fbuffer->entry;
2871 event_call->event.funcs->trace(iter, 0, event);
2872 trace_seq_putc(&iter->seq, 0);
2873 printk("%s", iter->seq.buffer);
2874
2875 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2876 }
2877
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2878 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2879 void *buffer, size_t *lenp,
2880 loff_t *ppos)
2881 {
2882 int save_tracepoint_printk;
2883 int ret;
2884
2885 mutex_lock(&tracepoint_printk_mutex);
2886 save_tracepoint_printk = tracepoint_printk;
2887
2888 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2889
2890 /*
2891 * This will force exiting early, as tracepoint_printk
2892 * is always zero when tracepoint_printk_iter is not allocated
2893 */
2894 if (!tracepoint_print_iter)
2895 tracepoint_printk = 0;
2896
2897 if (save_tracepoint_printk == tracepoint_printk)
2898 goto out;
2899
2900 if (tracepoint_printk)
2901 static_key_enable(&tracepoint_printk_key.key);
2902 else
2903 static_key_disable(&tracepoint_printk_key.key);
2904
2905 out:
2906 mutex_unlock(&tracepoint_printk_mutex);
2907
2908 return ret;
2909 }
2910
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2911 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2912 {
2913 enum event_trigger_type tt = ETT_NONE;
2914 struct trace_event_file *file = fbuffer->trace_file;
2915
2916 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2917 fbuffer->entry, &tt))
2918 goto discard;
2919
2920 if (static_key_false(&tracepoint_printk_key.key))
2921 output_printk(fbuffer);
2922
2923 if (static_branch_unlikely(&trace_event_exports_enabled))
2924 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2925
2926 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2927 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2928
2929 discard:
2930 if (tt)
2931 event_triggers_post_call(file, tt);
2932
2933 }
2934 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2935
2936 /*
2937 * Skip 3:
2938 *
2939 * trace_buffer_unlock_commit_regs()
2940 * trace_event_buffer_commit()
2941 * trace_event_raw_event_xxx()
2942 */
2943 # define STACK_SKIP 3
2944
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2945 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2946 struct trace_buffer *buffer,
2947 struct ring_buffer_event *event,
2948 unsigned int trace_ctx,
2949 struct pt_regs *regs)
2950 {
2951 __buffer_unlock_commit(buffer, event);
2952
2953 /*
2954 * If regs is not set, then skip the necessary functions.
2955 * Note, we can still get here via blktrace, wakeup tracer
2956 * and mmiotrace, but that's ok if they lose a function or
2957 * two. They are not that meaningful.
2958 */
2959 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2960 ftrace_trace_userstack(tr, buffer, trace_ctx);
2961 }
2962
2963 /*
2964 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2965 */
2966 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2967 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2968 struct ring_buffer_event *event)
2969 {
2970 __buffer_unlock_commit(buffer, event);
2971 }
2972
2973 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)2974 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2975 parent_ip, unsigned int trace_ctx)
2976 {
2977 struct trace_event_call *call = &event_function;
2978 struct trace_buffer *buffer = tr->array_buffer.buffer;
2979 struct ring_buffer_event *event;
2980 struct ftrace_entry *entry;
2981
2982 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2983 trace_ctx);
2984 if (!event)
2985 return;
2986 entry = ring_buffer_event_data(event);
2987 entry->ip = ip;
2988 entry->parent_ip = parent_ip;
2989
2990 if (!call_filter_check_discard(call, entry, buffer, event)) {
2991 if (static_branch_unlikely(&trace_function_exports_enabled))
2992 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2993 __buffer_unlock_commit(buffer, event);
2994 }
2995 }
2996
2997 #ifdef CONFIG_STACKTRACE
2998
2999 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3000 #define FTRACE_KSTACK_NESTING 4
3001
3002 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3003
3004 struct ftrace_stack {
3005 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3006 };
3007
3008
3009 struct ftrace_stacks {
3010 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3011 };
3012
3013 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3014 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3015
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3016 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3017 unsigned int trace_ctx,
3018 int skip, struct pt_regs *regs)
3019 {
3020 struct trace_event_call *call = &event_kernel_stack;
3021 struct ring_buffer_event *event;
3022 unsigned int size, nr_entries;
3023 struct ftrace_stack *fstack;
3024 struct stack_entry *entry;
3025 int stackidx;
3026
3027 /*
3028 * Add one, for this function and the call to save_stack_trace()
3029 * If regs is set, then these functions will not be in the way.
3030 */
3031 #ifndef CONFIG_UNWINDER_ORC
3032 if (!regs)
3033 skip++;
3034 #endif
3035
3036 preempt_disable_notrace();
3037
3038 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3039
3040 /* This should never happen. If it does, yell once and skip */
3041 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3042 goto out;
3043
3044 /*
3045 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3046 * interrupt will either see the value pre increment or post
3047 * increment. If the interrupt happens pre increment it will have
3048 * restored the counter when it returns. We just need a barrier to
3049 * keep gcc from moving things around.
3050 */
3051 barrier();
3052
3053 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3054 size = ARRAY_SIZE(fstack->calls);
3055
3056 if (regs) {
3057 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3058 size, skip);
3059 } else {
3060 nr_entries = stack_trace_save(fstack->calls, size, skip);
3061 }
3062
3063 size = nr_entries * sizeof(unsigned long);
3064 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3065 (sizeof(*entry) - sizeof(entry->caller)) + size,
3066 trace_ctx);
3067 if (!event)
3068 goto out;
3069 entry = ring_buffer_event_data(event);
3070
3071 memcpy(&entry->caller, fstack->calls, size);
3072 entry->size = nr_entries;
3073
3074 if (!call_filter_check_discard(call, entry, buffer, event))
3075 __buffer_unlock_commit(buffer, event);
3076
3077 out:
3078 /* Again, don't let gcc optimize things here */
3079 barrier();
3080 __this_cpu_dec(ftrace_stack_reserve);
3081 preempt_enable_notrace();
3082
3083 }
3084
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3085 static inline void ftrace_trace_stack(struct trace_array *tr,
3086 struct trace_buffer *buffer,
3087 unsigned int trace_ctx,
3088 int skip, struct pt_regs *regs)
3089 {
3090 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3091 return;
3092
3093 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3094 }
3095
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3096 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3097 int skip)
3098 {
3099 struct trace_buffer *buffer = tr->array_buffer.buffer;
3100
3101 if (rcu_is_watching()) {
3102 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3103 return;
3104 }
3105
3106 /*
3107 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3108 * but if the above rcu_is_watching() failed, then the NMI
3109 * triggered someplace critical, and rcu_irq_enter() should
3110 * not be called from NMI.
3111 */
3112 if (unlikely(in_nmi()))
3113 return;
3114
3115 rcu_irq_enter_irqson();
3116 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3117 rcu_irq_exit_irqson();
3118 }
3119
3120 /**
3121 * trace_dump_stack - record a stack back trace in the trace buffer
3122 * @skip: Number of functions to skip (helper handlers)
3123 */
trace_dump_stack(int skip)3124 void trace_dump_stack(int skip)
3125 {
3126 if (tracing_disabled || tracing_selftest_running)
3127 return;
3128
3129 #ifndef CONFIG_UNWINDER_ORC
3130 /* Skip 1 to skip this function. */
3131 skip++;
3132 #endif
3133 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3134 tracing_gen_ctx(), skip, NULL);
3135 }
3136 EXPORT_SYMBOL_GPL(trace_dump_stack);
3137
3138 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3139 static DEFINE_PER_CPU(int, user_stack_count);
3140
3141 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3142 ftrace_trace_userstack(struct trace_array *tr,
3143 struct trace_buffer *buffer, unsigned int trace_ctx)
3144 {
3145 struct trace_event_call *call = &event_user_stack;
3146 struct ring_buffer_event *event;
3147 struct userstack_entry *entry;
3148
3149 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3150 return;
3151
3152 /*
3153 * NMIs can not handle page faults, even with fix ups.
3154 * The save user stack can (and often does) fault.
3155 */
3156 if (unlikely(in_nmi()))
3157 return;
3158
3159 /*
3160 * prevent recursion, since the user stack tracing may
3161 * trigger other kernel events.
3162 */
3163 preempt_disable();
3164 if (__this_cpu_read(user_stack_count))
3165 goto out;
3166
3167 __this_cpu_inc(user_stack_count);
3168
3169 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3170 sizeof(*entry), trace_ctx);
3171 if (!event)
3172 goto out_drop_count;
3173 entry = ring_buffer_event_data(event);
3174
3175 entry->tgid = current->tgid;
3176 memset(&entry->caller, 0, sizeof(entry->caller));
3177
3178 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3179 if (!call_filter_check_discard(call, entry, buffer, event))
3180 __buffer_unlock_commit(buffer, event);
3181
3182 out_drop_count:
3183 __this_cpu_dec(user_stack_count);
3184 out:
3185 preempt_enable();
3186 }
3187 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3188 static void ftrace_trace_userstack(struct trace_array *tr,
3189 struct trace_buffer *buffer,
3190 unsigned int trace_ctx)
3191 {
3192 }
3193 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3194
3195 #endif /* CONFIG_STACKTRACE */
3196
3197 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3198 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3199 unsigned long long delta)
3200 {
3201 entry->bottom_delta_ts = delta & U32_MAX;
3202 entry->top_delta_ts = (delta >> 32);
3203 }
3204
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3205 void trace_last_func_repeats(struct trace_array *tr,
3206 struct trace_func_repeats *last_info,
3207 unsigned int trace_ctx)
3208 {
3209 struct trace_buffer *buffer = tr->array_buffer.buffer;
3210 struct func_repeats_entry *entry;
3211 struct ring_buffer_event *event;
3212 u64 delta;
3213
3214 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3215 sizeof(*entry), trace_ctx);
3216 if (!event)
3217 return;
3218
3219 delta = ring_buffer_event_time_stamp(buffer, event) -
3220 last_info->ts_last_call;
3221
3222 entry = ring_buffer_event_data(event);
3223 entry->ip = last_info->ip;
3224 entry->parent_ip = last_info->parent_ip;
3225 entry->count = last_info->count;
3226 func_repeats_set_delta_ts(entry, delta);
3227
3228 __buffer_unlock_commit(buffer, event);
3229 }
3230
3231 /* created for use with alloc_percpu */
3232 struct trace_buffer_struct {
3233 int nesting;
3234 char buffer[4][TRACE_BUF_SIZE];
3235 };
3236
3237 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3238
3239 /*
3240 * This allows for lockless recording. If we're nested too deeply, then
3241 * this returns NULL.
3242 */
get_trace_buf(void)3243 static char *get_trace_buf(void)
3244 {
3245 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3246
3247 if (!trace_percpu_buffer || buffer->nesting >= 4)
3248 return NULL;
3249
3250 buffer->nesting++;
3251
3252 /* Interrupts must see nesting incremented before we use the buffer */
3253 barrier();
3254 return &buffer->buffer[buffer->nesting - 1][0];
3255 }
3256
put_trace_buf(void)3257 static void put_trace_buf(void)
3258 {
3259 /* Don't let the decrement of nesting leak before this */
3260 barrier();
3261 this_cpu_dec(trace_percpu_buffer->nesting);
3262 }
3263
alloc_percpu_trace_buffer(void)3264 static int alloc_percpu_trace_buffer(void)
3265 {
3266 struct trace_buffer_struct __percpu *buffers;
3267
3268 if (trace_percpu_buffer)
3269 return 0;
3270
3271 buffers = alloc_percpu(struct trace_buffer_struct);
3272 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3273 return -ENOMEM;
3274
3275 trace_percpu_buffer = buffers;
3276 return 0;
3277 }
3278
3279 static int buffers_allocated;
3280
trace_printk_init_buffers(void)3281 void trace_printk_init_buffers(void)
3282 {
3283 if (buffers_allocated)
3284 return;
3285
3286 if (alloc_percpu_trace_buffer())
3287 return;
3288
3289 /* trace_printk() is for debug use only. Don't use it in production. */
3290
3291 pr_warn("\n");
3292 pr_warn("**********************************************************\n");
3293 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3294 pr_warn("** **\n");
3295 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3296 pr_warn("** **\n");
3297 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3298 pr_warn("** unsafe for production use. **\n");
3299 pr_warn("** **\n");
3300 pr_warn("** If you see this message and you are not debugging **\n");
3301 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3302 pr_warn("** **\n");
3303 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3304 pr_warn("**********************************************************\n");
3305
3306 /* Expand the buffers to set size */
3307 tracing_update_buffers();
3308
3309 buffers_allocated = 1;
3310
3311 /*
3312 * trace_printk_init_buffers() can be called by modules.
3313 * If that happens, then we need to start cmdline recording
3314 * directly here. If the global_trace.buffer is already
3315 * allocated here, then this was called by module code.
3316 */
3317 if (global_trace.array_buffer.buffer)
3318 tracing_start_cmdline_record();
3319 }
3320 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3321
trace_printk_start_comm(void)3322 void trace_printk_start_comm(void)
3323 {
3324 /* Start tracing comms if trace printk is set */
3325 if (!buffers_allocated)
3326 return;
3327 tracing_start_cmdline_record();
3328 }
3329
trace_printk_start_stop_comm(int enabled)3330 static void trace_printk_start_stop_comm(int enabled)
3331 {
3332 if (!buffers_allocated)
3333 return;
3334
3335 if (enabled)
3336 tracing_start_cmdline_record();
3337 else
3338 tracing_stop_cmdline_record();
3339 }
3340
3341 /**
3342 * trace_vbprintk - write binary msg to tracing buffer
3343 * @ip: The address of the caller
3344 * @fmt: The string format to write to the buffer
3345 * @args: Arguments for @fmt
3346 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3347 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3348 {
3349 struct trace_event_call *call = &event_bprint;
3350 struct ring_buffer_event *event;
3351 struct trace_buffer *buffer;
3352 struct trace_array *tr = &global_trace;
3353 struct bprint_entry *entry;
3354 unsigned int trace_ctx;
3355 char *tbuffer;
3356 int len = 0, size;
3357
3358 if (unlikely(tracing_selftest_running || tracing_disabled))
3359 return 0;
3360
3361 /* Don't pollute graph traces with trace_vprintk internals */
3362 pause_graph_tracing();
3363
3364 trace_ctx = tracing_gen_ctx();
3365 preempt_disable_notrace();
3366
3367 tbuffer = get_trace_buf();
3368 if (!tbuffer) {
3369 len = 0;
3370 goto out_nobuffer;
3371 }
3372
3373 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3374
3375 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3376 goto out_put;
3377
3378 size = sizeof(*entry) + sizeof(u32) * len;
3379 buffer = tr->array_buffer.buffer;
3380 ring_buffer_nest_start(buffer);
3381 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3382 trace_ctx);
3383 if (!event)
3384 goto out;
3385 entry = ring_buffer_event_data(event);
3386 entry->ip = ip;
3387 entry->fmt = fmt;
3388
3389 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3390 if (!call_filter_check_discard(call, entry, buffer, event)) {
3391 __buffer_unlock_commit(buffer, event);
3392 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3393 }
3394
3395 out:
3396 ring_buffer_nest_end(buffer);
3397 out_put:
3398 put_trace_buf();
3399
3400 out_nobuffer:
3401 preempt_enable_notrace();
3402 unpause_graph_tracing();
3403
3404 return len;
3405 }
3406 EXPORT_SYMBOL_GPL(trace_vbprintk);
3407
3408 __printf(3, 0)
3409 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3410 __trace_array_vprintk(struct trace_buffer *buffer,
3411 unsigned long ip, const char *fmt, va_list args)
3412 {
3413 struct trace_event_call *call = &event_print;
3414 struct ring_buffer_event *event;
3415 int len = 0, size;
3416 struct print_entry *entry;
3417 unsigned int trace_ctx;
3418 char *tbuffer;
3419
3420 if (tracing_disabled || tracing_selftest_running)
3421 return 0;
3422
3423 /* Don't pollute graph traces with trace_vprintk internals */
3424 pause_graph_tracing();
3425
3426 trace_ctx = tracing_gen_ctx();
3427 preempt_disable_notrace();
3428
3429
3430 tbuffer = get_trace_buf();
3431 if (!tbuffer) {
3432 len = 0;
3433 goto out_nobuffer;
3434 }
3435
3436 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3437
3438 size = sizeof(*entry) + len + 1;
3439 ring_buffer_nest_start(buffer);
3440 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3441 trace_ctx);
3442 if (!event)
3443 goto out;
3444 entry = ring_buffer_event_data(event);
3445 entry->ip = ip;
3446
3447 memcpy(&entry->buf, tbuffer, len + 1);
3448 if (!call_filter_check_discard(call, entry, buffer, event)) {
3449 __buffer_unlock_commit(buffer, event);
3450 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3451 }
3452
3453 out:
3454 ring_buffer_nest_end(buffer);
3455 put_trace_buf();
3456
3457 out_nobuffer:
3458 preempt_enable_notrace();
3459 unpause_graph_tracing();
3460
3461 return len;
3462 }
3463
3464 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3465 int trace_array_vprintk(struct trace_array *tr,
3466 unsigned long ip, const char *fmt, va_list args)
3467 {
3468 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3469 }
3470
3471 /**
3472 * trace_array_printk - Print a message to a specific instance
3473 * @tr: The instance trace_array descriptor
3474 * @ip: The instruction pointer that this is called from.
3475 * @fmt: The format to print (printf format)
3476 *
3477 * If a subsystem sets up its own instance, they have the right to
3478 * printk strings into their tracing instance buffer using this
3479 * function. Note, this function will not write into the top level
3480 * buffer (use trace_printk() for that), as writing into the top level
3481 * buffer should only have events that can be individually disabled.
3482 * trace_printk() is only used for debugging a kernel, and should not
3483 * be ever incorporated in normal use.
3484 *
3485 * trace_array_printk() can be used, as it will not add noise to the
3486 * top level tracing buffer.
3487 *
3488 * Note, trace_array_init_printk() must be called on @tr before this
3489 * can be used.
3490 */
3491 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3492 int trace_array_printk(struct trace_array *tr,
3493 unsigned long ip, const char *fmt, ...)
3494 {
3495 int ret;
3496 va_list ap;
3497
3498 if (!tr)
3499 return -ENOENT;
3500
3501 /* This is only allowed for created instances */
3502 if (tr == &global_trace)
3503 return 0;
3504
3505 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3506 return 0;
3507
3508 va_start(ap, fmt);
3509 ret = trace_array_vprintk(tr, ip, fmt, ap);
3510 va_end(ap);
3511 return ret;
3512 }
3513 EXPORT_SYMBOL_GPL(trace_array_printk);
3514
3515 /**
3516 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3517 * @tr: The trace array to initialize the buffers for
3518 *
3519 * As trace_array_printk() only writes into instances, they are OK to
3520 * have in the kernel (unlike trace_printk()). This needs to be called
3521 * before trace_array_printk() can be used on a trace_array.
3522 */
trace_array_init_printk(struct trace_array * tr)3523 int trace_array_init_printk(struct trace_array *tr)
3524 {
3525 if (!tr)
3526 return -ENOENT;
3527
3528 /* This is only allowed for created instances */
3529 if (tr == &global_trace)
3530 return -EINVAL;
3531
3532 return alloc_percpu_trace_buffer();
3533 }
3534 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3535
3536 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3537 int trace_array_printk_buf(struct trace_buffer *buffer,
3538 unsigned long ip, const char *fmt, ...)
3539 {
3540 int ret;
3541 va_list ap;
3542
3543 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3544 return 0;
3545
3546 va_start(ap, fmt);
3547 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3548 va_end(ap);
3549 return ret;
3550 }
3551
3552 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3553 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3554 {
3555 return trace_array_vprintk(&global_trace, ip, fmt, args);
3556 }
3557 EXPORT_SYMBOL_GPL(trace_vprintk);
3558
trace_iterator_increment(struct trace_iterator * iter)3559 static void trace_iterator_increment(struct trace_iterator *iter)
3560 {
3561 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3562
3563 iter->idx++;
3564 if (buf_iter)
3565 ring_buffer_iter_advance(buf_iter);
3566 }
3567
3568 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3569 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3570 unsigned long *lost_events)
3571 {
3572 struct ring_buffer_event *event;
3573 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3574
3575 if (buf_iter) {
3576 event = ring_buffer_iter_peek(buf_iter, ts);
3577 if (lost_events)
3578 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3579 (unsigned long)-1 : 0;
3580 } else {
3581 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3582 lost_events);
3583 }
3584
3585 if (event) {
3586 iter->ent_size = ring_buffer_event_length(event);
3587 return ring_buffer_event_data(event);
3588 }
3589 iter->ent_size = 0;
3590 return NULL;
3591 }
3592
3593 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3594 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3595 unsigned long *missing_events, u64 *ent_ts)
3596 {
3597 struct trace_buffer *buffer = iter->array_buffer->buffer;
3598 struct trace_entry *ent, *next = NULL;
3599 unsigned long lost_events = 0, next_lost = 0;
3600 int cpu_file = iter->cpu_file;
3601 u64 next_ts = 0, ts;
3602 int next_cpu = -1;
3603 int next_size = 0;
3604 int cpu;
3605
3606 /*
3607 * If we are in a per_cpu trace file, don't bother by iterating over
3608 * all cpu and peek directly.
3609 */
3610 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3611 if (ring_buffer_empty_cpu(buffer, cpu_file))
3612 return NULL;
3613 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3614 if (ent_cpu)
3615 *ent_cpu = cpu_file;
3616
3617 return ent;
3618 }
3619
3620 for_each_tracing_cpu(cpu) {
3621
3622 if (ring_buffer_empty_cpu(buffer, cpu))
3623 continue;
3624
3625 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3626
3627 /*
3628 * Pick the entry with the smallest timestamp:
3629 */
3630 if (ent && (!next || ts < next_ts)) {
3631 next = ent;
3632 next_cpu = cpu;
3633 next_ts = ts;
3634 next_lost = lost_events;
3635 next_size = iter->ent_size;
3636 }
3637 }
3638
3639 iter->ent_size = next_size;
3640
3641 if (ent_cpu)
3642 *ent_cpu = next_cpu;
3643
3644 if (ent_ts)
3645 *ent_ts = next_ts;
3646
3647 if (missing_events)
3648 *missing_events = next_lost;
3649
3650 return next;
3651 }
3652
3653 #define STATIC_FMT_BUF_SIZE 128
3654 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3655
trace_iter_expand_format(struct trace_iterator * iter)3656 static char *trace_iter_expand_format(struct trace_iterator *iter)
3657 {
3658 char *tmp;
3659
3660 /*
3661 * iter->tr is NULL when used with tp_printk, which makes
3662 * this get called where it is not safe to call krealloc().
3663 */
3664 if (!iter->tr || iter->fmt == static_fmt_buf)
3665 return NULL;
3666
3667 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3668 GFP_KERNEL);
3669 if (tmp) {
3670 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3671 iter->fmt = tmp;
3672 }
3673
3674 return tmp;
3675 }
3676
3677 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str,bool star,int len)3678 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3679 bool star, int len)
3680 {
3681 unsigned long addr = (unsigned long)str;
3682 struct trace_event *trace_event;
3683 struct trace_event_call *event;
3684
3685 /* Ignore strings with no length */
3686 if (star && !len)
3687 return true;
3688
3689 /* OK if part of the event data */
3690 if ((addr >= (unsigned long)iter->ent) &&
3691 (addr < (unsigned long)iter->ent + iter->ent_size))
3692 return true;
3693
3694 /* OK if part of the temp seq buffer */
3695 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3696 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3697 return true;
3698
3699 /* Core rodata can not be freed */
3700 if (is_kernel_rodata(addr))
3701 return true;
3702
3703 if (trace_is_tracepoint_string(str))
3704 return true;
3705
3706 /*
3707 * Now this could be a module event, referencing core module
3708 * data, which is OK.
3709 */
3710 if (!iter->ent)
3711 return false;
3712
3713 trace_event = ftrace_find_event(iter->ent->type);
3714 if (!trace_event)
3715 return false;
3716
3717 event = container_of(trace_event, struct trace_event_call, event);
3718 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3719 return false;
3720
3721 /* Would rather have rodata, but this will suffice */
3722 if (within_module_core(addr, event->module))
3723 return true;
3724
3725 return false;
3726 }
3727
show_buffer(struct trace_seq * s)3728 static const char *show_buffer(struct trace_seq *s)
3729 {
3730 struct seq_buf *seq = &s->seq;
3731
3732 seq_buf_terminate(seq);
3733
3734 return seq->buffer;
3735 }
3736
3737 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3738
test_can_verify_check(const char * fmt,...)3739 static int test_can_verify_check(const char *fmt, ...)
3740 {
3741 char buf[16];
3742 va_list ap;
3743 int ret;
3744
3745 /*
3746 * The verifier is dependent on vsnprintf() modifies the va_list
3747 * passed to it, where it is sent as a reference. Some architectures
3748 * (like x86_32) passes it by value, which means that vsnprintf()
3749 * does not modify the va_list passed to it, and the verifier
3750 * would then need to be able to understand all the values that
3751 * vsnprintf can use. If it is passed by value, then the verifier
3752 * is disabled.
3753 */
3754 va_start(ap, fmt);
3755 vsnprintf(buf, 16, "%d", ap);
3756 ret = va_arg(ap, int);
3757 va_end(ap);
3758
3759 return ret;
3760 }
3761
test_can_verify(void)3762 static void test_can_verify(void)
3763 {
3764 if (!test_can_verify_check("%d %d", 0, 1)) {
3765 pr_info("trace event string verifier disabled\n");
3766 static_branch_inc(&trace_no_verify);
3767 }
3768 }
3769
3770 /**
3771 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3772 * @iter: The iterator that holds the seq buffer and the event being printed
3773 * @fmt: The format used to print the event
3774 * @ap: The va_list holding the data to print from @fmt.
3775 *
3776 * This writes the data into the @iter->seq buffer using the data from
3777 * @fmt and @ap. If the format has a %s, then the source of the string
3778 * is examined to make sure it is safe to print, otherwise it will
3779 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3780 * pointer.
3781 */
trace_check_vprintf(struct trace_iterator * iter,const char * fmt,va_list ap)3782 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3783 va_list ap)
3784 {
3785 const char *p = fmt;
3786 const char *str;
3787 int i, j;
3788
3789 if (WARN_ON_ONCE(!fmt))
3790 return;
3791
3792 if (static_branch_unlikely(&trace_no_verify))
3793 goto print;
3794
3795 /* Don't bother checking when doing a ftrace_dump() */
3796 if (iter->fmt == static_fmt_buf)
3797 goto print;
3798
3799 while (*p) {
3800 bool star = false;
3801 int len = 0;
3802
3803 j = 0;
3804
3805 /* We only care about %s and variants */
3806 for (i = 0; p[i]; i++) {
3807 if (i + 1 >= iter->fmt_size) {
3808 /*
3809 * If we can't expand the copy buffer,
3810 * just print it.
3811 */
3812 if (!trace_iter_expand_format(iter))
3813 goto print;
3814 }
3815
3816 if (p[i] == '\\' && p[i+1]) {
3817 i++;
3818 continue;
3819 }
3820 if (p[i] == '%') {
3821 /* Need to test cases like %08.*s */
3822 for (j = 1; p[i+j]; j++) {
3823 if (isdigit(p[i+j]) ||
3824 p[i+j] == '.')
3825 continue;
3826 if (p[i+j] == '*') {
3827 star = true;
3828 continue;
3829 }
3830 break;
3831 }
3832 if (p[i+j] == 's')
3833 break;
3834 star = false;
3835 }
3836 j = 0;
3837 }
3838 /* If no %s found then just print normally */
3839 if (!p[i])
3840 break;
3841
3842 /* Copy up to the %s, and print that */
3843 strncpy(iter->fmt, p, i);
3844 iter->fmt[i] = '\0';
3845 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3846
3847 /*
3848 * If iter->seq is full, the above call no longer guarantees
3849 * that ap is in sync with fmt processing, and further calls
3850 * to va_arg() can return wrong positional arguments.
3851 *
3852 * Ensure that ap is no longer used in this case.
3853 */
3854 if (iter->seq.full) {
3855 p = "";
3856 break;
3857 }
3858
3859 if (star)
3860 len = va_arg(ap, int);
3861
3862 /* The ap now points to the string data of the %s */
3863 str = va_arg(ap, const char *);
3864
3865 /*
3866 * If you hit this warning, it is likely that the
3867 * trace event in question used %s on a string that
3868 * was saved at the time of the event, but may not be
3869 * around when the trace is read. Use __string(),
3870 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3871 * instead. See samples/trace_events/trace-events-sample.h
3872 * for reference.
3873 */
3874 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3875 "fmt: '%s' current_buffer: '%s'",
3876 fmt, show_buffer(&iter->seq))) {
3877 int ret;
3878
3879 /* Try to safely read the string */
3880 if (star) {
3881 if (len + 1 > iter->fmt_size)
3882 len = iter->fmt_size - 1;
3883 if (len < 0)
3884 len = 0;
3885 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3886 iter->fmt[len] = 0;
3887 star = false;
3888 } else {
3889 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3890 iter->fmt_size);
3891 }
3892 if (ret < 0)
3893 trace_seq_printf(&iter->seq, "(0x%px)", str);
3894 else
3895 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3896 str, iter->fmt);
3897 str = "[UNSAFE-MEMORY]";
3898 strcpy(iter->fmt, "%s");
3899 } else {
3900 strncpy(iter->fmt, p + i, j + 1);
3901 iter->fmt[j+1] = '\0';
3902 }
3903 if (star)
3904 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3905 else
3906 trace_seq_printf(&iter->seq, iter->fmt, str);
3907
3908 p += i + j + 1;
3909 }
3910 print:
3911 if (*p)
3912 trace_seq_vprintf(&iter->seq, p, ap);
3913 }
3914
trace_event_format(struct trace_iterator * iter,const char * fmt)3915 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3916 {
3917 const char *p, *new_fmt;
3918 char *q;
3919
3920 if (WARN_ON_ONCE(!fmt))
3921 return fmt;
3922
3923 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3924 return fmt;
3925
3926 p = fmt;
3927 new_fmt = q = iter->fmt;
3928 while (*p) {
3929 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3930 if (!trace_iter_expand_format(iter))
3931 return fmt;
3932
3933 q += iter->fmt - new_fmt;
3934 new_fmt = iter->fmt;
3935 }
3936
3937 *q++ = *p++;
3938
3939 /* Replace %p with %px */
3940 if (p[-1] == '%') {
3941 if (p[0] == '%') {
3942 *q++ = *p++;
3943 } else if (p[0] == 'p' && !isalnum(p[1])) {
3944 *q++ = *p++;
3945 *q++ = 'x';
3946 }
3947 }
3948 }
3949 *q = '\0';
3950
3951 return new_fmt;
3952 }
3953
3954 #define STATIC_TEMP_BUF_SIZE 128
3955 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3956
3957 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3958 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3959 int *ent_cpu, u64 *ent_ts)
3960 {
3961 /* __find_next_entry will reset ent_size */
3962 int ent_size = iter->ent_size;
3963 struct trace_entry *entry;
3964
3965 /*
3966 * If called from ftrace_dump(), then the iter->temp buffer
3967 * will be the static_temp_buf and not created from kmalloc.
3968 * If the entry size is greater than the buffer, we can
3969 * not save it. Just return NULL in that case. This is only
3970 * used to add markers when two consecutive events' time
3971 * stamps have a large delta. See trace_print_lat_context()
3972 */
3973 if (iter->temp == static_temp_buf &&
3974 STATIC_TEMP_BUF_SIZE < ent_size)
3975 return NULL;
3976
3977 /*
3978 * The __find_next_entry() may call peek_next_entry(), which may
3979 * call ring_buffer_peek() that may make the contents of iter->ent
3980 * undefined. Need to copy iter->ent now.
3981 */
3982 if (iter->ent && iter->ent != iter->temp) {
3983 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3984 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3985 void *temp;
3986 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3987 if (!temp)
3988 return NULL;
3989 kfree(iter->temp);
3990 iter->temp = temp;
3991 iter->temp_size = iter->ent_size;
3992 }
3993 memcpy(iter->temp, iter->ent, iter->ent_size);
3994 iter->ent = iter->temp;
3995 }
3996 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3997 /* Put back the original ent_size */
3998 iter->ent_size = ent_size;
3999
4000 return entry;
4001 }
4002
4003 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)4004 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4005 {
4006 iter->ent = __find_next_entry(iter, &iter->cpu,
4007 &iter->lost_events, &iter->ts);
4008
4009 if (iter->ent)
4010 trace_iterator_increment(iter);
4011
4012 return iter->ent ? iter : NULL;
4013 }
4014
trace_consume(struct trace_iterator * iter)4015 static void trace_consume(struct trace_iterator *iter)
4016 {
4017 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4018 &iter->lost_events);
4019 }
4020
s_next(struct seq_file * m,void * v,loff_t * pos)4021 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4022 {
4023 struct trace_iterator *iter = m->private;
4024 int i = (int)*pos;
4025 void *ent;
4026
4027 WARN_ON_ONCE(iter->leftover);
4028
4029 (*pos)++;
4030
4031 /* can't go backwards */
4032 if (iter->idx > i)
4033 return NULL;
4034
4035 if (iter->idx < 0)
4036 ent = trace_find_next_entry_inc(iter);
4037 else
4038 ent = iter;
4039
4040 while (ent && iter->idx < i)
4041 ent = trace_find_next_entry_inc(iter);
4042
4043 iter->pos = *pos;
4044
4045 return ent;
4046 }
4047
tracing_iter_reset(struct trace_iterator * iter,int cpu)4048 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4049 {
4050 struct ring_buffer_iter *buf_iter;
4051 unsigned long entries = 0;
4052 u64 ts;
4053
4054 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4055
4056 buf_iter = trace_buffer_iter(iter, cpu);
4057 if (!buf_iter)
4058 return;
4059
4060 ring_buffer_iter_reset(buf_iter);
4061
4062 /*
4063 * We could have the case with the max latency tracers
4064 * that a reset never took place on a cpu. This is evident
4065 * by the timestamp being before the start of the buffer.
4066 */
4067 while (ring_buffer_iter_peek(buf_iter, &ts)) {
4068 if (ts >= iter->array_buffer->time_start)
4069 break;
4070 entries++;
4071 ring_buffer_iter_advance(buf_iter);
4072 }
4073
4074 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4075 }
4076
4077 /*
4078 * The current tracer is copied to avoid a global locking
4079 * all around.
4080 */
s_start(struct seq_file * m,loff_t * pos)4081 static void *s_start(struct seq_file *m, loff_t *pos)
4082 {
4083 struct trace_iterator *iter = m->private;
4084 struct trace_array *tr = iter->tr;
4085 int cpu_file = iter->cpu_file;
4086 void *p = NULL;
4087 loff_t l = 0;
4088 int cpu;
4089
4090 /*
4091 * copy the tracer to avoid using a global lock all around.
4092 * iter->trace is a copy of current_trace, the pointer to the
4093 * name may be used instead of a strcmp(), as iter->trace->name
4094 * will point to the same string as current_trace->name.
4095 */
4096 mutex_lock(&trace_types_lock);
4097 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
4098 /* Close iter->trace before switching to the new current tracer */
4099 if (iter->trace->close)
4100 iter->trace->close(iter);
4101 *iter->trace = *tr->current_trace;
4102 /* Reopen the new current tracer */
4103 if (iter->trace->open)
4104 iter->trace->open(iter);
4105 }
4106 mutex_unlock(&trace_types_lock);
4107
4108 #ifdef CONFIG_TRACER_MAX_TRACE
4109 if (iter->snapshot && iter->trace->use_max_tr)
4110 return ERR_PTR(-EBUSY);
4111 #endif
4112
4113 if (*pos != iter->pos) {
4114 iter->ent = NULL;
4115 iter->cpu = 0;
4116 iter->idx = -1;
4117
4118 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4119 for_each_tracing_cpu(cpu)
4120 tracing_iter_reset(iter, cpu);
4121 } else
4122 tracing_iter_reset(iter, cpu_file);
4123
4124 iter->leftover = 0;
4125 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4126 ;
4127
4128 } else {
4129 /*
4130 * If we overflowed the seq_file before, then we want
4131 * to just reuse the trace_seq buffer again.
4132 */
4133 if (iter->leftover)
4134 p = iter;
4135 else {
4136 l = *pos - 1;
4137 p = s_next(m, p, &l);
4138 }
4139 }
4140
4141 trace_event_read_lock();
4142 trace_access_lock(cpu_file);
4143 return p;
4144 }
4145
s_stop(struct seq_file * m,void * p)4146 static void s_stop(struct seq_file *m, void *p)
4147 {
4148 struct trace_iterator *iter = m->private;
4149
4150 #ifdef CONFIG_TRACER_MAX_TRACE
4151 if (iter->snapshot && iter->trace->use_max_tr)
4152 return;
4153 #endif
4154
4155 trace_access_unlock(iter->cpu_file);
4156 trace_event_read_unlock();
4157 }
4158
4159 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4160 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4161 unsigned long *entries, int cpu)
4162 {
4163 unsigned long count;
4164
4165 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4166 /*
4167 * If this buffer has skipped entries, then we hold all
4168 * entries for the trace and we need to ignore the
4169 * ones before the time stamp.
4170 */
4171 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4172 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4173 /* total is the same as the entries */
4174 *total = count;
4175 } else
4176 *total = count +
4177 ring_buffer_overrun_cpu(buf->buffer, cpu);
4178 *entries = count;
4179 }
4180
4181 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4182 get_total_entries(struct array_buffer *buf,
4183 unsigned long *total, unsigned long *entries)
4184 {
4185 unsigned long t, e;
4186 int cpu;
4187
4188 *total = 0;
4189 *entries = 0;
4190
4191 for_each_tracing_cpu(cpu) {
4192 get_total_entries_cpu(buf, &t, &e, cpu);
4193 *total += t;
4194 *entries += e;
4195 }
4196 }
4197
trace_total_entries_cpu(struct trace_array * tr,int cpu)4198 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4199 {
4200 unsigned long total, entries;
4201
4202 if (!tr)
4203 tr = &global_trace;
4204
4205 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4206
4207 return entries;
4208 }
4209
trace_total_entries(struct trace_array * tr)4210 unsigned long trace_total_entries(struct trace_array *tr)
4211 {
4212 unsigned long total, entries;
4213
4214 if (!tr)
4215 tr = &global_trace;
4216
4217 get_total_entries(&tr->array_buffer, &total, &entries);
4218
4219 return entries;
4220 }
4221
print_lat_help_header(struct seq_file * m)4222 static void print_lat_help_header(struct seq_file *m)
4223 {
4224 seq_puts(m, "# _------=> CPU# \n"
4225 "# / _-----=> irqs-off \n"
4226 "# | / _----=> need-resched \n"
4227 "# || / _---=> hardirq/softirq \n"
4228 "# ||| / _--=> preempt-depth \n"
4229 "# |||| / _-=> migrate-disable \n"
4230 "# ||||| / delay \n"
4231 "# cmd pid |||||| time | caller \n"
4232 "# \\ / |||||| \\ | / \n");
4233 }
4234
print_event_info(struct array_buffer * buf,struct seq_file * m)4235 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4236 {
4237 unsigned long total;
4238 unsigned long entries;
4239
4240 get_total_entries(buf, &total, &entries);
4241 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4242 entries, total, num_online_cpus());
4243 seq_puts(m, "#\n");
4244 }
4245
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4246 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4247 unsigned int flags)
4248 {
4249 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4250
4251 print_event_info(buf, m);
4252
4253 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4254 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4255 }
4256
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4257 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4258 unsigned int flags)
4259 {
4260 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4261 const char *space = " ";
4262 int prec = tgid ? 12 : 2;
4263
4264 print_event_info(buf, m);
4265
4266 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
4267 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4268 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4269 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4270 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4271 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4272 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4273 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4274 }
4275
4276 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4277 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4278 {
4279 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4280 struct array_buffer *buf = iter->array_buffer;
4281 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4282 struct tracer *type = iter->trace;
4283 unsigned long entries;
4284 unsigned long total;
4285 const char *name = "preemption";
4286
4287 name = type->name;
4288
4289 get_total_entries(buf, &total, &entries);
4290
4291 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4292 name, UTS_RELEASE);
4293 seq_puts(m, "# -----------------------------------"
4294 "---------------------------------\n");
4295 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4296 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4297 nsecs_to_usecs(data->saved_latency),
4298 entries,
4299 total,
4300 buf->cpu,
4301 #if defined(CONFIG_PREEMPT_NONE)
4302 "server",
4303 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4304 "desktop",
4305 #elif defined(CONFIG_PREEMPT)
4306 "preempt",
4307 #elif defined(CONFIG_PREEMPT_RT)
4308 "preempt_rt",
4309 #else
4310 "unknown",
4311 #endif
4312 /* These are reserved for later use */
4313 0, 0, 0, 0);
4314 #ifdef CONFIG_SMP
4315 seq_printf(m, " #P:%d)\n", num_online_cpus());
4316 #else
4317 seq_puts(m, ")\n");
4318 #endif
4319 seq_puts(m, "# -----------------\n");
4320 seq_printf(m, "# | task: %.16s-%d "
4321 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4322 data->comm, data->pid,
4323 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4324 data->policy, data->rt_priority);
4325 seq_puts(m, "# -----------------\n");
4326
4327 if (data->critical_start) {
4328 seq_puts(m, "# => started at: ");
4329 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4330 trace_print_seq(m, &iter->seq);
4331 seq_puts(m, "\n# => ended at: ");
4332 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4333 trace_print_seq(m, &iter->seq);
4334 seq_puts(m, "\n#\n");
4335 }
4336
4337 seq_puts(m, "#\n");
4338 }
4339
test_cpu_buff_start(struct trace_iterator * iter)4340 static void test_cpu_buff_start(struct trace_iterator *iter)
4341 {
4342 struct trace_seq *s = &iter->seq;
4343 struct trace_array *tr = iter->tr;
4344
4345 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4346 return;
4347
4348 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4349 return;
4350
4351 if (cpumask_available(iter->started) &&
4352 cpumask_test_cpu(iter->cpu, iter->started))
4353 return;
4354
4355 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4356 return;
4357
4358 if (cpumask_available(iter->started))
4359 cpumask_set_cpu(iter->cpu, iter->started);
4360
4361 /* Don't print started cpu buffer for the first entry of the trace */
4362 if (iter->idx > 1)
4363 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4364 iter->cpu);
4365 }
4366
print_trace_fmt(struct trace_iterator * iter)4367 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4368 {
4369 struct trace_array *tr = iter->tr;
4370 struct trace_seq *s = &iter->seq;
4371 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4372 struct trace_entry *entry;
4373 struct trace_event *event;
4374
4375 entry = iter->ent;
4376
4377 test_cpu_buff_start(iter);
4378
4379 event = ftrace_find_event(entry->type);
4380
4381 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4382 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4383 trace_print_lat_context(iter);
4384 else
4385 trace_print_context(iter);
4386 }
4387
4388 if (trace_seq_has_overflowed(s))
4389 return TRACE_TYPE_PARTIAL_LINE;
4390
4391 if (event)
4392 return event->funcs->trace(iter, sym_flags, event);
4393
4394 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4395
4396 return trace_handle_return(s);
4397 }
4398
print_raw_fmt(struct trace_iterator * iter)4399 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4400 {
4401 struct trace_array *tr = iter->tr;
4402 struct trace_seq *s = &iter->seq;
4403 struct trace_entry *entry;
4404 struct trace_event *event;
4405
4406 entry = iter->ent;
4407
4408 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4409 trace_seq_printf(s, "%d %d %llu ",
4410 entry->pid, iter->cpu, iter->ts);
4411
4412 if (trace_seq_has_overflowed(s))
4413 return TRACE_TYPE_PARTIAL_LINE;
4414
4415 event = ftrace_find_event(entry->type);
4416 if (event)
4417 return event->funcs->raw(iter, 0, event);
4418
4419 trace_seq_printf(s, "%d ?\n", entry->type);
4420
4421 return trace_handle_return(s);
4422 }
4423
print_hex_fmt(struct trace_iterator * iter)4424 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4425 {
4426 struct trace_array *tr = iter->tr;
4427 struct trace_seq *s = &iter->seq;
4428 unsigned char newline = '\n';
4429 struct trace_entry *entry;
4430 struct trace_event *event;
4431
4432 entry = iter->ent;
4433
4434 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4435 SEQ_PUT_HEX_FIELD(s, entry->pid);
4436 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4437 SEQ_PUT_HEX_FIELD(s, iter->ts);
4438 if (trace_seq_has_overflowed(s))
4439 return TRACE_TYPE_PARTIAL_LINE;
4440 }
4441
4442 event = ftrace_find_event(entry->type);
4443 if (event) {
4444 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4445 if (ret != TRACE_TYPE_HANDLED)
4446 return ret;
4447 }
4448
4449 SEQ_PUT_FIELD(s, newline);
4450
4451 return trace_handle_return(s);
4452 }
4453
print_bin_fmt(struct trace_iterator * iter)4454 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4455 {
4456 struct trace_array *tr = iter->tr;
4457 struct trace_seq *s = &iter->seq;
4458 struct trace_entry *entry;
4459 struct trace_event *event;
4460
4461 entry = iter->ent;
4462
4463 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4464 SEQ_PUT_FIELD(s, entry->pid);
4465 SEQ_PUT_FIELD(s, iter->cpu);
4466 SEQ_PUT_FIELD(s, iter->ts);
4467 if (trace_seq_has_overflowed(s))
4468 return TRACE_TYPE_PARTIAL_LINE;
4469 }
4470
4471 event = ftrace_find_event(entry->type);
4472 return event ? event->funcs->binary(iter, 0, event) :
4473 TRACE_TYPE_HANDLED;
4474 }
4475
trace_empty(struct trace_iterator * iter)4476 int trace_empty(struct trace_iterator *iter)
4477 {
4478 struct ring_buffer_iter *buf_iter;
4479 int cpu;
4480
4481 /* If we are looking at one CPU buffer, only check that one */
4482 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4483 cpu = iter->cpu_file;
4484 buf_iter = trace_buffer_iter(iter, cpu);
4485 if (buf_iter) {
4486 if (!ring_buffer_iter_empty(buf_iter))
4487 return 0;
4488 } else {
4489 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4490 return 0;
4491 }
4492 return 1;
4493 }
4494
4495 for_each_tracing_cpu(cpu) {
4496 buf_iter = trace_buffer_iter(iter, cpu);
4497 if (buf_iter) {
4498 if (!ring_buffer_iter_empty(buf_iter))
4499 return 0;
4500 } else {
4501 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4502 return 0;
4503 }
4504 }
4505
4506 return 1;
4507 }
4508
4509 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4510 enum print_line_t print_trace_line(struct trace_iterator *iter)
4511 {
4512 struct trace_array *tr = iter->tr;
4513 unsigned long trace_flags = tr->trace_flags;
4514 enum print_line_t ret;
4515
4516 if (iter->lost_events) {
4517 if (iter->lost_events == (unsigned long)-1)
4518 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4519 iter->cpu);
4520 else
4521 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4522 iter->cpu, iter->lost_events);
4523 if (trace_seq_has_overflowed(&iter->seq))
4524 return TRACE_TYPE_PARTIAL_LINE;
4525 }
4526
4527 if (iter->trace && iter->trace->print_line) {
4528 ret = iter->trace->print_line(iter);
4529 if (ret != TRACE_TYPE_UNHANDLED)
4530 return ret;
4531 }
4532
4533 if (iter->ent->type == TRACE_BPUTS &&
4534 trace_flags & TRACE_ITER_PRINTK &&
4535 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4536 return trace_print_bputs_msg_only(iter);
4537
4538 if (iter->ent->type == TRACE_BPRINT &&
4539 trace_flags & TRACE_ITER_PRINTK &&
4540 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4541 return trace_print_bprintk_msg_only(iter);
4542
4543 if (iter->ent->type == TRACE_PRINT &&
4544 trace_flags & TRACE_ITER_PRINTK &&
4545 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4546 return trace_print_printk_msg_only(iter);
4547
4548 if (trace_flags & TRACE_ITER_BIN)
4549 return print_bin_fmt(iter);
4550
4551 if (trace_flags & TRACE_ITER_HEX)
4552 return print_hex_fmt(iter);
4553
4554 if (trace_flags & TRACE_ITER_RAW)
4555 return print_raw_fmt(iter);
4556
4557 return print_trace_fmt(iter);
4558 }
4559
trace_latency_header(struct seq_file * m)4560 void trace_latency_header(struct seq_file *m)
4561 {
4562 struct trace_iterator *iter = m->private;
4563 struct trace_array *tr = iter->tr;
4564
4565 /* print nothing if the buffers are empty */
4566 if (trace_empty(iter))
4567 return;
4568
4569 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4570 print_trace_header(m, iter);
4571
4572 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4573 print_lat_help_header(m);
4574 }
4575
trace_default_header(struct seq_file * m)4576 void trace_default_header(struct seq_file *m)
4577 {
4578 struct trace_iterator *iter = m->private;
4579 struct trace_array *tr = iter->tr;
4580 unsigned long trace_flags = tr->trace_flags;
4581
4582 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4583 return;
4584
4585 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4586 /* print nothing if the buffers are empty */
4587 if (trace_empty(iter))
4588 return;
4589 print_trace_header(m, iter);
4590 if (!(trace_flags & TRACE_ITER_VERBOSE))
4591 print_lat_help_header(m);
4592 } else {
4593 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4594 if (trace_flags & TRACE_ITER_IRQ_INFO)
4595 print_func_help_header_irq(iter->array_buffer,
4596 m, trace_flags);
4597 else
4598 print_func_help_header(iter->array_buffer, m,
4599 trace_flags);
4600 }
4601 }
4602 }
4603
test_ftrace_alive(struct seq_file * m)4604 static void test_ftrace_alive(struct seq_file *m)
4605 {
4606 if (!ftrace_is_dead())
4607 return;
4608 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4609 "# MAY BE MISSING FUNCTION EVENTS\n");
4610 }
4611
4612 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4613 static void show_snapshot_main_help(struct seq_file *m)
4614 {
4615 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4616 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4617 "# Takes a snapshot of the main buffer.\n"
4618 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4619 "# (Doesn't have to be '2' works with any number that\n"
4620 "# is not a '0' or '1')\n");
4621 }
4622
show_snapshot_percpu_help(struct seq_file * m)4623 static void show_snapshot_percpu_help(struct seq_file *m)
4624 {
4625 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4626 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4627 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4628 "# Takes a snapshot of the main buffer for this cpu.\n");
4629 #else
4630 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4631 "# Must use main snapshot file to allocate.\n");
4632 #endif
4633 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4634 "# (Doesn't have to be '2' works with any number that\n"
4635 "# is not a '0' or '1')\n");
4636 }
4637
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4638 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4639 {
4640 if (iter->tr->allocated_snapshot)
4641 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4642 else
4643 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4644
4645 seq_puts(m, "# Snapshot commands:\n");
4646 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4647 show_snapshot_main_help(m);
4648 else
4649 show_snapshot_percpu_help(m);
4650 }
4651 #else
4652 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4653 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4654 #endif
4655
s_show(struct seq_file * m,void * v)4656 static int s_show(struct seq_file *m, void *v)
4657 {
4658 struct trace_iterator *iter = v;
4659 int ret;
4660
4661 if (iter->ent == NULL) {
4662 if (iter->tr) {
4663 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4664 seq_puts(m, "#\n");
4665 test_ftrace_alive(m);
4666 }
4667 if (iter->snapshot && trace_empty(iter))
4668 print_snapshot_help(m, iter);
4669 else if (iter->trace && iter->trace->print_header)
4670 iter->trace->print_header(m);
4671 else
4672 trace_default_header(m);
4673
4674 } else if (iter->leftover) {
4675 /*
4676 * If we filled the seq_file buffer earlier, we
4677 * want to just show it now.
4678 */
4679 ret = trace_print_seq(m, &iter->seq);
4680
4681 /* ret should this time be zero, but you never know */
4682 iter->leftover = ret;
4683
4684 } else {
4685 ret = print_trace_line(iter);
4686 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4687 iter->seq.full = 0;
4688 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4689 }
4690 ret = trace_print_seq(m, &iter->seq);
4691 /*
4692 * If we overflow the seq_file buffer, then it will
4693 * ask us for this data again at start up.
4694 * Use that instead.
4695 * ret is 0 if seq_file write succeeded.
4696 * -1 otherwise.
4697 */
4698 iter->leftover = ret;
4699 }
4700
4701 return 0;
4702 }
4703
4704 /*
4705 * Should be used after trace_array_get(), trace_types_lock
4706 * ensures that i_cdev was already initialized.
4707 */
tracing_get_cpu(struct inode * inode)4708 static inline int tracing_get_cpu(struct inode *inode)
4709 {
4710 if (inode->i_cdev) /* See trace_create_cpu_file() */
4711 return (long)inode->i_cdev - 1;
4712 return RING_BUFFER_ALL_CPUS;
4713 }
4714
4715 static const struct seq_operations tracer_seq_ops = {
4716 .start = s_start,
4717 .next = s_next,
4718 .stop = s_stop,
4719 .show = s_show,
4720 };
4721
4722 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4723 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4724 {
4725 struct trace_array *tr = inode->i_private;
4726 struct trace_iterator *iter;
4727 int cpu;
4728
4729 if (tracing_disabled)
4730 return ERR_PTR(-ENODEV);
4731
4732 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4733 if (!iter)
4734 return ERR_PTR(-ENOMEM);
4735
4736 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4737 GFP_KERNEL);
4738 if (!iter->buffer_iter)
4739 goto release;
4740
4741 /*
4742 * trace_find_next_entry() may need to save off iter->ent.
4743 * It will place it into the iter->temp buffer. As most
4744 * events are less than 128, allocate a buffer of that size.
4745 * If one is greater, then trace_find_next_entry() will
4746 * allocate a new buffer to adjust for the bigger iter->ent.
4747 * It's not critical if it fails to get allocated here.
4748 */
4749 iter->temp = kmalloc(128, GFP_KERNEL);
4750 if (iter->temp)
4751 iter->temp_size = 128;
4752
4753 /*
4754 * trace_event_printf() may need to modify given format
4755 * string to replace %p with %px so that it shows real address
4756 * instead of hash value. However, that is only for the event
4757 * tracing, other tracer may not need. Defer the allocation
4758 * until it is needed.
4759 */
4760 iter->fmt = NULL;
4761 iter->fmt_size = 0;
4762
4763 /*
4764 * We make a copy of the current tracer to avoid concurrent
4765 * changes on it while we are reading.
4766 */
4767 mutex_lock(&trace_types_lock);
4768 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4769 if (!iter->trace)
4770 goto fail;
4771
4772 *iter->trace = *tr->current_trace;
4773
4774 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4775 goto fail;
4776
4777 iter->tr = tr;
4778
4779 #ifdef CONFIG_TRACER_MAX_TRACE
4780 /* Currently only the top directory has a snapshot */
4781 if (tr->current_trace->print_max || snapshot)
4782 iter->array_buffer = &tr->max_buffer;
4783 else
4784 #endif
4785 iter->array_buffer = &tr->array_buffer;
4786 iter->snapshot = snapshot;
4787 iter->pos = -1;
4788 iter->cpu_file = tracing_get_cpu(inode);
4789 mutex_init(&iter->mutex);
4790
4791 /* Notify the tracer early; before we stop tracing. */
4792 if (iter->trace->open)
4793 iter->trace->open(iter);
4794
4795 /* Annotate start of buffers if we had overruns */
4796 if (ring_buffer_overruns(iter->array_buffer->buffer))
4797 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4798
4799 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4800 if (trace_clocks[tr->clock_id].in_ns)
4801 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4802
4803 /*
4804 * If pause-on-trace is enabled, then stop the trace while
4805 * dumping, unless this is the "snapshot" file
4806 */
4807 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4808 tracing_stop_tr(tr);
4809
4810 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4811 for_each_tracing_cpu(cpu) {
4812 iter->buffer_iter[cpu] =
4813 ring_buffer_read_prepare(iter->array_buffer->buffer,
4814 cpu, GFP_KERNEL);
4815 }
4816 ring_buffer_read_prepare_sync();
4817 for_each_tracing_cpu(cpu) {
4818 ring_buffer_read_start(iter->buffer_iter[cpu]);
4819 tracing_iter_reset(iter, cpu);
4820 }
4821 } else {
4822 cpu = iter->cpu_file;
4823 iter->buffer_iter[cpu] =
4824 ring_buffer_read_prepare(iter->array_buffer->buffer,
4825 cpu, GFP_KERNEL);
4826 ring_buffer_read_prepare_sync();
4827 ring_buffer_read_start(iter->buffer_iter[cpu]);
4828 tracing_iter_reset(iter, cpu);
4829 }
4830
4831 mutex_unlock(&trace_types_lock);
4832
4833 return iter;
4834
4835 fail:
4836 mutex_unlock(&trace_types_lock);
4837 kfree(iter->trace);
4838 kfree(iter->temp);
4839 kfree(iter->buffer_iter);
4840 release:
4841 seq_release_private(inode, file);
4842 return ERR_PTR(-ENOMEM);
4843 }
4844
tracing_open_generic(struct inode * inode,struct file * filp)4845 int tracing_open_generic(struct inode *inode, struct file *filp)
4846 {
4847 int ret;
4848
4849 ret = tracing_check_open_get_tr(NULL);
4850 if (ret)
4851 return ret;
4852
4853 filp->private_data = inode->i_private;
4854 return 0;
4855 }
4856
tracing_is_disabled(void)4857 bool tracing_is_disabled(void)
4858 {
4859 return (tracing_disabled) ? true: false;
4860 }
4861
4862 /*
4863 * Open and update trace_array ref count.
4864 * Must have the current trace_array passed to it.
4865 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4866 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4867 {
4868 struct trace_array *tr = inode->i_private;
4869 int ret;
4870
4871 ret = tracing_check_open_get_tr(tr);
4872 if (ret)
4873 return ret;
4874
4875 filp->private_data = inode->i_private;
4876
4877 return 0;
4878 }
4879
4880 /*
4881 * The private pointer of the inode is the trace_event_file.
4882 * Update the tr ref count associated to it.
4883 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4884 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4885 {
4886 struct trace_event_file *file = inode->i_private;
4887 int ret;
4888
4889 ret = tracing_check_open_get_tr(file->tr);
4890 if (ret)
4891 return ret;
4892
4893 filp->private_data = inode->i_private;
4894
4895 return 0;
4896 }
4897
tracing_release_file_tr(struct inode * inode,struct file * filp)4898 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4899 {
4900 struct trace_event_file *file = inode->i_private;
4901
4902 trace_array_put(file->tr);
4903
4904 return 0;
4905 }
4906
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4907 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4908 {
4909 tracing_release_file_tr(inode, filp);
4910 return single_release(inode, filp);
4911 }
4912
tracing_mark_open(struct inode * inode,struct file * filp)4913 static int tracing_mark_open(struct inode *inode, struct file *filp)
4914 {
4915 stream_open(inode, filp);
4916 return tracing_open_generic_tr(inode, filp);
4917 }
4918
tracing_release(struct inode * inode,struct file * file)4919 static int tracing_release(struct inode *inode, struct file *file)
4920 {
4921 struct trace_array *tr = inode->i_private;
4922 struct seq_file *m = file->private_data;
4923 struct trace_iterator *iter;
4924 int cpu;
4925
4926 if (!(file->f_mode & FMODE_READ)) {
4927 trace_array_put(tr);
4928 return 0;
4929 }
4930
4931 /* Writes do not use seq_file */
4932 iter = m->private;
4933 mutex_lock(&trace_types_lock);
4934
4935 for_each_tracing_cpu(cpu) {
4936 if (iter->buffer_iter[cpu])
4937 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4938 }
4939
4940 if (iter->trace && iter->trace->close)
4941 iter->trace->close(iter);
4942
4943 if (!iter->snapshot && tr->stop_count)
4944 /* reenable tracing if it was previously enabled */
4945 tracing_start_tr(tr);
4946
4947 __trace_array_put(tr);
4948
4949 mutex_unlock(&trace_types_lock);
4950
4951 mutex_destroy(&iter->mutex);
4952 free_cpumask_var(iter->started);
4953 kfree(iter->fmt);
4954 kfree(iter->temp);
4955 kfree(iter->trace);
4956 kfree(iter->buffer_iter);
4957 seq_release_private(inode, file);
4958
4959 return 0;
4960 }
4961
tracing_release_generic_tr(struct inode * inode,struct file * file)4962 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4963 {
4964 struct trace_array *tr = inode->i_private;
4965
4966 trace_array_put(tr);
4967 return 0;
4968 }
4969
tracing_single_release_tr(struct inode * inode,struct file * file)4970 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4971 {
4972 struct trace_array *tr = inode->i_private;
4973
4974 trace_array_put(tr);
4975
4976 return single_release(inode, file);
4977 }
4978
tracing_open(struct inode * inode,struct file * file)4979 static int tracing_open(struct inode *inode, struct file *file)
4980 {
4981 struct trace_array *tr = inode->i_private;
4982 struct trace_iterator *iter;
4983 int ret;
4984
4985 ret = tracing_check_open_get_tr(tr);
4986 if (ret)
4987 return ret;
4988
4989 /* If this file was open for write, then erase contents */
4990 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4991 int cpu = tracing_get_cpu(inode);
4992 struct array_buffer *trace_buf = &tr->array_buffer;
4993
4994 #ifdef CONFIG_TRACER_MAX_TRACE
4995 if (tr->current_trace->print_max)
4996 trace_buf = &tr->max_buffer;
4997 #endif
4998
4999 if (cpu == RING_BUFFER_ALL_CPUS)
5000 tracing_reset_online_cpus(trace_buf);
5001 else
5002 tracing_reset_cpu(trace_buf, cpu);
5003 }
5004
5005 if (file->f_mode & FMODE_READ) {
5006 iter = __tracing_open(inode, file, false);
5007 if (IS_ERR(iter))
5008 ret = PTR_ERR(iter);
5009 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5010 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5011 }
5012
5013 if (ret < 0)
5014 trace_array_put(tr);
5015
5016 return ret;
5017 }
5018
5019 /*
5020 * Some tracers are not suitable for instance buffers.
5021 * A tracer is always available for the global array (toplevel)
5022 * or if it explicitly states that it is.
5023 */
5024 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5025 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5026 {
5027 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5028 }
5029
5030 /* Find the next tracer that this trace array may use */
5031 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5032 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5033 {
5034 while (t && !trace_ok_for_array(t, tr))
5035 t = t->next;
5036
5037 return t;
5038 }
5039
5040 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5041 t_next(struct seq_file *m, void *v, loff_t *pos)
5042 {
5043 struct trace_array *tr = m->private;
5044 struct tracer *t = v;
5045
5046 (*pos)++;
5047
5048 if (t)
5049 t = get_tracer_for_array(tr, t->next);
5050
5051 return t;
5052 }
5053
t_start(struct seq_file * m,loff_t * pos)5054 static void *t_start(struct seq_file *m, loff_t *pos)
5055 {
5056 struct trace_array *tr = m->private;
5057 struct tracer *t;
5058 loff_t l = 0;
5059
5060 mutex_lock(&trace_types_lock);
5061
5062 t = get_tracer_for_array(tr, trace_types);
5063 for (; t && l < *pos; t = t_next(m, t, &l))
5064 ;
5065
5066 return t;
5067 }
5068
t_stop(struct seq_file * m,void * p)5069 static void t_stop(struct seq_file *m, void *p)
5070 {
5071 mutex_unlock(&trace_types_lock);
5072 }
5073
t_show(struct seq_file * m,void * v)5074 static int t_show(struct seq_file *m, void *v)
5075 {
5076 struct tracer *t = v;
5077
5078 if (!t)
5079 return 0;
5080
5081 seq_puts(m, t->name);
5082 if (t->next)
5083 seq_putc(m, ' ');
5084 else
5085 seq_putc(m, '\n');
5086
5087 return 0;
5088 }
5089
5090 static const struct seq_operations show_traces_seq_ops = {
5091 .start = t_start,
5092 .next = t_next,
5093 .stop = t_stop,
5094 .show = t_show,
5095 };
5096
show_traces_open(struct inode * inode,struct file * file)5097 static int show_traces_open(struct inode *inode, struct file *file)
5098 {
5099 struct trace_array *tr = inode->i_private;
5100 struct seq_file *m;
5101 int ret;
5102
5103 ret = tracing_check_open_get_tr(tr);
5104 if (ret)
5105 return ret;
5106
5107 ret = seq_open(file, &show_traces_seq_ops);
5108 if (ret) {
5109 trace_array_put(tr);
5110 return ret;
5111 }
5112
5113 m = file->private_data;
5114 m->private = tr;
5115
5116 return 0;
5117 }
5118
show_traces_release(struct inode * inode,struct file * file)5119 static int show_traces_release(struct inode *inode, struct file *file)
5120 {
5121 struct trace_array *tr = inode->i_private;
5122
5123 trace_array_put(tr);
5124 return seq_release(inode, file);
5125 }
5126
5127 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5128 tracing_write_stub(struct file *filp, const char __user *ubuf,
5129 size_t count, loff_t *ppos)
5130 {
5131 return count;
5132 }
5133
tracing_lseek(struct file * file,loff_t offset,int whence)5134 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5135 {
5136 int ret;
5137
5138 if (file->f_mode & FMODE_READ)
5139 ret = seq_lseek(file, offset, whence);
5140 else
5141 file->f_pos = ret = 0;
5142
5143 return ret;
5144 }
5145
5146 static const struct file_operations tracing_fops = {
5147 .open = tracing_open,
5148 .read = seq_read,
5149 .read_iter = seq_read_iter,
5150 .splice_read = generic_file_splice_read,
5151 .write = tracing_write_stub,
5152 .llseek = tracing_lseek,
5153 .release = tracing_release,
5154 };
5155
5156 static const struct file_operations show_traces_fops = {
5157 .open = show_traces_open,
5158 .read = seq_read,
5159 .llseek = seq_lseek,
5160 .release = show_traces_release,
5161 };
5162
5163 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5164 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5165 size_t count, loff_t *ppos)
5166 {
5167 struct trace_array *tr = file_inode(filp)->i_private;
5168 char *mask_str;
5169 int len;
5170
5171 len = snprintf(NULL, 0, "%*pb\n",
5172 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5173 mask_str = kmalloc(len, GFP_KERNEL);
5174 if (!mask_str)
5175 return -ENOMEM;
5176
5177 len = snprintf(mask_str, len, "%*pb\n",
5178 cpumask_pr_args(tr->tracing_cpumask));
5179 if (len >= count) {
5180 count = -EINVAL;
5181 goto out_err;
5182 }
5183 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5184
5185 out_err:
5186 kfree(mask_str);
5187
5188 return count;
5189 }
5190
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5191 int tracing_set_cpumask(struct trace_array *tr,
5192 cpumask_var_t tracing_cpumask_new)
5193 {
5194 int cpu;
5195
5196 if (!tr)
5197 return -EINVAL;
5198
5199 local_irq_disable();
5200 arch_spin_lock(&tr->max_lock);
5201 for_each_tracing_cpu(cpu) {
5202 /*
5203 * Increase/decrease the disabled counter if we are
5204 * about to flip a bit in the cpumask:
5205 */
5206 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5207 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5208 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5209 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5210 #ifdef CONFIG_TRACER_MAX_TRACE
5211 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5212 #endif
5213 }
5214 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5215 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5216 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5217 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5218 #ifdef CONFIG_TRACER_MAX_TRACE
5219 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5220 #endif
5221 }
5222 }
5223 arch_spin_unlock(&tr->max_lock);
5224 local_irq_enable();
5225
5226 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5227
5228 return 0;
5229 }
5230
5231 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5232 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5233 size_t count, loff_t *ppos)
5234 {
5235 struct trace_array *tr = file_inode(filp)->i_private;
5236 cpumask_var_t tracing_cpumask_new;
5237 int err;
5238
5239 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5240 return -ENOMEM;
5241
5242 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5243 if (err)
5244 goto err_free;
5245
5246 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5247 if (err)
5248 goto err_free;
5249
5250 free_cpumask_var(tracing_cpumask_new);
5251
5252 return count;
5253
5254 err_free:
5255 free_cpumask_var(tracing_cpumask_new);
5256
5257 return err;
5258 }
5259
5260 static const struct file_operations tracing_cpumask_fops = {
5261 .open = tracing_open_generic_tr,
5262 .read = tracing_cpumask_read,
5263 .write = tracing_cpumask_write,
5264 .release = tracing_release_generic_tr,
5265 .llseek = generic_file_llseek,
5266 };
5267
tracing_trace_options_show(struct seq_file * m,void * v)5268 static int tracing_trace_options_show(struct seq_file *m, void *v)
5269 {
5270 struct tracer_opt *trace_opts;
5271 struct trace_array *tr = m->private;
5272 u32 tracer_flags;
5273 int i;
5274
5275 mutex_lock(&trace_types_lock);
5276 tracer_flags = tr->current_trace->flags->val;
5277 trace_opts = tr->current_trace->flags->opts;
5278
5279 for (i = 0; trace_options[i]; i++) {
5280 if (tr->trace_flags & (1 << i))
5281 seq_printf(m, "%s\n", trace_options[i]);
5282 else
5283 seq_printf(m, "no%s\n", trace_options[i]);
5284 }
5285
5286 for (i = 0; trace_opts[i].name; i++) {
5287 if (tracer_flags & trace_opts[i].bit)
5288 seq_printf(m, "%s\n", trace_opts[i].name);
5289 else
5290 seq_printf(m, "no%s\n", trace_opts[i].name);
5291 }
5292 mutex_unlock(&trace_types_lock);
5293
5294 return 0;
5295 }
5296
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5297 static int __set_tracer_option(struct trace_array *tr,
5298 struct tracer_flags *tracer_flags,
5299 struct tracer_opt *opts, int neg)
5300 {
5301 struct tracer *trace = tracer_flags->trace;
5302 int ret;
5303
5304 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5305 if (ret)
5306 return ret;
5307
5308 if (neg)
5309 tracer_flags->val &= ~opts->bit;
5310 else
5311 tracer_flags->val |= opts->bit;
5312 return 0;
5313 }
5314
5315 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5316 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5317 {
5318 struct tracer *trace = tr->current_trace;
5319 struct tracer_flags *tracer_flags = trace->flags;
5320 struct tracer_opt *opts = NULL;
5321 int i;
5322
5323 for (i = 0; tracer_flags->opts[i].name; i++) {
5324 opts = &tracer_flags->opts[i];
5325
5326 if (strcmp(cmp, opts->name) == 0)
5327 return __set_tracer_option(tr, trace->flags, opts, neg);
5328 }
5329
5330 return -EINVAL;
5331 }
5332
5333 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5334 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5335 {
5336 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5337 return -1;
5338
5339 return 0;
5340 }
5341
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5342 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5343 {
5344 int *map;
5345
5346 if ((mask == TRACE_ITER_RECORD_TGID) ||
5347 (mask == TRACE_ITER_RECORD_CMD))
5348 lockdep_assert_held(&event_mutex);
5349
5350 /* do nothing if flag is already set */
5351 if (!!(tr->trace_flags & mask) == !!enabled)
5352 return 0;
5353
5354 /* Give the tracer a chance to approve the change */
5355 if (tr->current_trace->flag_changed)
5356 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5357 return -EINVAL;
5358
5359 if (enabled)
5360 tr->trace_flags |= mask;
5361 else
5362 tr->trace_flags &= ~mask;
5363
5364 if (mask == TRACE_ITER_RECORD_CMD)
5365 trace_event_enable_cmd_record(enabled);
5366
5367 if (mask == TRACE_ITER_RECORD_TGID) {
5368 if (!tgid_map) {
5369 tgid_map_max = pid_max;
5370 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5371 GFP_KERNEL);
5372
5373 /*
5374 * Pairs with smp_load_acquire() in
5375 * trace_find_tgid_ptr() to ensure that if it observes
5376 * the tgid_map we just allocated then it also observes
5377 * the corresponding tgid_map_max value.
5378 */
5379 smp_store_release(&tgid_map, map);
5380 }
5381 if (!tgid_map) {
5382 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5383 return -ENOMEM;
5384 }
5385
5386 trace_event_enable_tgid_record(enabled);
5387 }
5388
5389 if (mask == TRACE_ITER_EVENT_FORK)
5390 trace_event_follow_fork(tr, enabled);
5391
5392 if (mask == TRACE_ITER_FUNC_FORK)
5393 ftrace_pid_follow_fork(tr, enabled);
5394
5395 if (mask == TRACE_ITER_OVERWRITE) {
5396 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5397 #ifdef CONFIG_TRACER_MAX_TRACE
5398 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5399 #endif
5400 }
5401
5402 if (mask == TRACE_ITER_PRINTK) {
5403 trace_printk_start_stop_comm(enabled);
5404 trace_printk_control(enabled);
5405 }
5406
5407 return 0;
5408 }
5409
trace_set_options(struct trace_array * tr,char * option)5410 int trace_set_options(struct trace_array *tr, char *option)
5411 {
5412 char *cmp;
5413 int neg = 0;
5414 int ret;
5415 size_t orig_len = strlen(option);
5416 int len;
5417
5418 cmp = strstrip(option);
5419
5420 len = str_has_prefix(cmp, "no");
5421 if (len)
5422 neg = 1;
5423
5424 cmp += len;
5425
5426 mutex_lock(&event_mutex);
5427 mutex_lock(&trace_types_lock);
5428
5429 ret = match_string(trace_options, -1, cmp);
5430 /* If no option could be set, test the specific tracer options */
5431 if (ret < 0)
5432 ret = set_tracer_option(tr, cmp, neg);
5433 else
5434 ret = set_tracer_flag(tr, 1 << ret, !neg);
5435
5436 mutex_unlock(&trace_types_lock);
5437 mutex_unlock(&event_mutex);
5438
5439 /*
5440 * If the first trailing whitespace is replaced with '\0' by strstrip,
5441 * turn it back into a space.
5442 */
5443 if (orig_len > strlen(option))
5444 option[strlen(option)] = ' ';
5445
5446 return ret;
5447 }
5448
apply_trace_boot_options(void)5449 static void __init apply_trace_boot_options(void)
5450 {
5451 char *buf = trace_boot_options_buf;
5452 char *option;
5453
5454 while (true) {
5455 option = strsep(&buf, ",");
5456
5457 if (!option)
5458 break;
5459
5460 if (*option)
5461 trace_set_options(&global_trace, option);
5462
5463 /* Put back the comma to allow this to be called again */
5464 if (buf)
5465 *(buf - 1) = ',';
5466 }
5467 }
5468
5469 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5470 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5471 size_t cnt, loff_t *ppos)
5472 {
5473 struct seq_file *m = filp->private_data;
5474 struct trace_array *tr = m->private;
5475 char buf[64];
5476 int ret;
5477
5478 if (cnt >= sizeof(buf))
5479 return -EINVAL;
5480
5481 if (copy_from_user(buf, ubuf, cnt))
5482 return -EFAULT;
5483
5484 buf[cnt] = 0;
5485
5486 ret = trace_set_options(tr, buf);
5487 if (ret < 0)
5488 return ret;
5489
5490 *ppos += cnt;
5491
5492 return cnt;
5493 }
5494
tracing_trace_options_open(struct inode * inode,struct file * file)5495 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5496 {
5497 struct trace_array *tr = inode->i_private;
5498 int ret;
5499
5500 ret = tracing_check_open_get_tr(tr);
5501 if (ret)
5502 return ret;
5503
5504 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5505 if (ret < 0)
5506 trace_array_put(tr);
5507
5508 return ret;
5509 }
5510
5511 static const struct file_operations tracing_iter_fops = {
5512 .open = tracing_trace_options_open,
5513 .read = seq_read,
5514 .llseek = seq_lseek,
5515 .release = tracing_single_release_tr,
5516 .write = tracing_trace_options_write,
5517 };
5518
5519 static const char readme_msg[] =
5520 "tracing mini-HOWTO:\n\n"
5521 "# echo 0 > tracing_on : quick way to disable tracing\n"
5522 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5523 " Important files:\n"
5524 " trace\t\t\t- The static contents of the buffer\n"
5525 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5526 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5527 " current_tracer\t- function and latency tracers\n"
5528 " available_tracers\t- list of configured tracers for current_tracer\n"
5529 " error_log\t- error log for failed commands (that support it)\n"
5530 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5531 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5532 " trace_clock\t\t-change the clock used to order events\n"
5533 " local: Per cpu clock but may not be synced across CPUs\n"
5534 " global: Synced across CPUs but slows tracing down.\n"
5535 " counter: Not a clock, but just an increment\n"
5536 " uptime: Jiffy counter from time of boot\n"
5537 " perf: Same clock that perf events use\n"
5538 #ifdef CONFIG_X86_64
5539 " x86-tsc: TSC cycle counter\n"
5540 #endif
5541 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5542 " delta: Delta difference against a buffer-wide timestamp\n"
5543 " absolute: Absolute (standalone) timestamp\n"
5544 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5545 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5546 " tracing_cpumask\t- Limit which CPUs to trace\n"
5547 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5548 "\t\t\t Remove sub-buffer with rmdir\n"
5549 " trace_options\t\t- Set format or modify how tracing happens\n"
5550 "\t\t\t Disable an option by prefixing 'no' to the\n"
5551 "\t\t\t option name\n"
5552 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5553 #ifdef CONFIG_DYNAMIC_FTRACE
5554 "\n available_filter_functions - list of functions that can be filtered on\n"
5555 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5556 "\t\t\t functions\n"
5557 "\t accepts: func_full_name or glob-matching-pattern\n"
5558 "\t modules: Can select a group via module\n"
5559 "\t Format: :mod:<module-name>\n"
5560 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5561 "\t triggers: a command to perform when function is hit\n"
5562 "\t Format: <function>:<trigger>[:count]\n"
5563 "\t trigger: traceon, traceoff\n"
5564 "\t\t enable_event:<system>:<event>\n"
5565 "\t\t disable_event:<system>:<event>\n"
5566 #ifdef CONFIG_STACKTRACE
5567 "\t\t stacktrace\n"
5568 #endif
5569 #ifdef CONFIG_TRACER_SNAPSHOT
5570 "\t\t snapshot\n"
5571 #endif
5572 "\t\t dump\n"
5573 "\t\t cpudump\n"
5574 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5575 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5576 "\t The first one will disable tracing every time do_fault is hit\n"
5577 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5578 "\t The first time do trap is hit and it disables tracing, the\n"
5579 "\t counter will decrement to 2. If tracing is already disabled,\n"
5580 "\t the counter will not decrement. It only decrements when the\n"
5581 "\t trigger did work\n"
5582 "\t To remove trigger without count:\n"
5583 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5584 "\t To remove trigger with a count:\n"
5585 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5586 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5587 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5588 "\t modules: Can select a group via module command :mod:\n"
5589 "\t Does not accept triggers\n"
5590 #endif /* CONFIG_DYNAMIC_FTRACE */
5591 #ifdef CONFIG_FUNCTION_TRACER
5592 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5593 "\t\t (function)\n"
5594 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5595 "\t\t (function)\n"
5596 #endif
5597 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5598 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5599 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5600 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5601 #endif
5602 #ifdef CONFIG_TRACER_SNAPSHOT
5603 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5604 "\t\t\t snapshot buffer. Read the contents for more\n"
5605 "\t\t\t information\n"
5606 #endif
5607 #ifdef CONFIG_STACK_TRACER
5608 " stack_trace\t\t- Shows the max stack trace when active\n"
5609 " stack_max_size\t- Shows current max stack size that was traced\n"
5610 "\t\t\t Write into this file to reset the max size (trigger a\n"
5611 "\t\t\t new trace)\n"
5612 #ifdef CONFIG_DYNAMIC_FTRACE
5613 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5614 "\t\t\t traces\n"
5615 #endif
5616 #endif /* CONFIG_STACK_TRACER */
5617 #ifdef CONFIG_DYNAMIC_EVENTS
5618 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5619 "\t\t\t Write into this file to define/undefine new trace events.\n"
5620 #endif
5621 #ifdef CONFIG_KPROBE_EVENTS
5622 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5623 "\t\t\t Write into this file to define/undefine new trace events.\n"
5624 #endif
5625 #ifdef CONFIG_UPROBE_EVENTS
5626 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5627 "\t\t\t Write into this file to define/undefine new trace events.\n"
5628 #endif
5629 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5630 "\t accepts: event-definitions (one definition per line)\n"
5631 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5632 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5633 #ifdef CONFIG_HIST_TRIGGERS
5634 "\t s:[synthetic/]<event> <field> [<field>]\n"
5635 #endif
5636 "\t e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5637 "\t -:[<group>/]<event>\n"
5638 #ifdef CONFIG_KPROBE_EVENTS
5639 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5640 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5641 #endif
5642 #ifdef CONFIG_UPROBE_EVENTS
5643 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5644 #endif
5645 "\t args: <name>=fetcharg[:type]\n"
5646 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5647 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5648 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5649 #else
5650 "\t $stack<index>, $stack, $retval, $comm,\n"
5651 #endif
5652 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5653 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5654 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5655 "\t symstr, <type>\\[<array-size>\\]\n"
5656 #ifdef CONFIG_HIST_TRIGGERS
5657 "\t field: <stype> <name>;\n"
5658 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5659 "\t [unsigned] char/int/long\n"
5660 #endif
5661 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5662 "\t of the <attached-group>/<attached-event>.\n"
5663 #endif
5664 " events/\t\t- Directory containing all trace event subsystems:\n"
5665 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5666 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5667 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5668 "\t\t\t events\n"
5669 " filter\t\t- If set, only events passing filter are traced\n"
5670 " events/<system>/<event>/\t- Directory containing control files for\n"
5671 "\t\t\t <event>:\n"
5672 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5673 " filter\t\t- If set, only events passing filter are traced\n"
5674 " trigger\t\t- If set, a command to perform when event is hit\n"
5675 "\t Format: <trigger>[:count][if <filter>]\n"
5676 "\t trigger: traceon, traceoff\n"
5677 "\t enable_event:<system>:<event>\n"
5678 "\t disable_event:<system>:<event>\n"
5679 #ifdef CONFIG_HIST_TRIGGERS
5680 "\t enable_hist:<system>:<event>\n"
5681 "\t disable_hist:<system>:<event>\n"
5682 #endif
5683 #ifdef CONFIG_STACKTRACE
5684 "\t\t stacktrace\n"
5685 #endif
5686 #ifdef CONFIG_TRACER_SNAPSHOT
5687 "\t\t snapshot\n"
5688 #endif
5689 #ifdef CONFIG_HIST_TRIGGERS
5690 "\t\t hist (see below)\n"
5691 #endif
5692 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5693 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5694 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5695 "\t events/block/block_unplug/trigger\n"
5696 "\t The first disables tracing every time block_unplug is hit.\n"
5697 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5698 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5699 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5700 "\t Like function triggers, the counter is only decremented if it\n"
5701 "\t enabled or disabled tracing.\n"
5702 "\t To remove a trigger without a count:\n"
5703 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5704 "\t To remove a trigger with a count:\n"
5705 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5706 "\t Filters can be ignored when removing a trigger.\n"
5707 #ifdef CONFIG_HIST_TRIGGERS
5708 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5709 "\t Format: hist:keys=<field1[,field2,...]>\n"
5710 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5711 "\t [:values=<field1[,field2,...]>]\n"
5712 "\t [:sort=<field1[,field2,...]>]\n"
5713 "\t [:size=#entries]\n"
5714 "\t [:pause][:continue][:clear]\n"
5715 "\t [:name=histname1]\n"
5716 "\t [:<handler>.<action>]\n"
5717 "\t [if <filter>]\n\n"
5718 "\t Note, special fields can be used as well:\n"
5719 "\t common_timestamp - to record current timestamp\n"
5720 "\t common_cpu - to record the CPU the event happened on\n"
5721 "\n"
5722 "\t A hist trigger variable can be:\n"
5723 "\t - a reference to a field e.g. x=current_timestamp,\n"
5724 "\t - a reference to another variable e.g. y=$x,\n"
5725 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5726 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5727 "\n"
5728 "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
5729 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5730 "\t variable reference, field or numeric literal.\n"
5731 "\n"
5732 "\t When a matching event is hit, an entry is added to a hash\n"
5733 "\t table using the key(s) and value(s) named, and the value of a\n"
5734 "\t sum called 'hitcount' is incremented. Keys and values\n"
5735 "\t correspond to fields in the event's format description. Keys\n"
5736 "\t can be any field, or the special string 'stacktrace'.\n"
5737 "\t Compound keys consisting of up to two fields can be specified\n"
5738 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5739 "\t fields. Sort keys consisting of up to two fields can be\n"
5740 "\t specified using the 'sort' keyword. The sort direction can\n"
5741 "\t be modified by appending '.descending' or '.ascending' to a\n"
5742 "\t sort field. The 'size' parameter can be used to specify more\n"
5743 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5744 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5745 "\t its histogram data will be shared with other triggers of the\n"
5746 "\t same name, and trigger hits will update this common data.\n\n"
5747 "\t Reading the 'hist' file for the event will dump the hash\n"
5748 "\t table in its entirety to stdout. If there are multiple hist\n"
5749 "\t triggers attached to an event, there will be a table for each\n"
5750 "\t trigger in the output. The table displayed for a named\n"
5751 "\t trigger will be the same as any other instance having the\n"
5752 "\t same name. The default format used to display a given field\n"
5753 "\t can be modified by appending any of the following modifiers\n"
5754 "\t to the field name, as applicable:\n\n"
5755 "\t .hex display a number as a hex value\n"
5756 "\t .sym display an address as a symbol\n"
5757 "\t .sym-offset display an address as a symbol and offset\n"
5758 "\t .execname display a common_pid as a program name\n"
5759 "\t .syscall display a syscall id as a syscall name\n"
5760 "\t .log2 display log2 value rather than raw number\n"
5761 "\t .buckets=size display values in groups of size rather than raw number\n"
5762 "\t .usecs display a common_timestamp in microseconds\n\n"
5763 "\t The 'pause' parameter can be used to pause an existing hist\n"
5764 "\t trigger or to start a hist trigger but not log any events\n"
5765 "\t until told to do so. 'continue' can be used to start or\n"
5766 "\t restart a paused hist trigger.\n\n"
5767 "\t The 'clear' parameter will clear the contents of a running\n"
5768 "\t hist trigger and leave its current paused/active state\n"
5769 "\t unchanged.\n\n"
5770 "\t The enable_hist and disable_hist triggers can be used to\n"
5771 "\t have one event conditionally start and stop another event's\n"
5772 "\t already-attached hist trigger. The syntax is analogous to\n"
5773 "\t the enable_event and disable_event triggers.\n\n"
5774 "\t Hist trigger handlers and actions are executed whenever a\n"
5775 "\t a histogram entry is added or updated. They take the form:\n\n"
5776 "\t <handler>.<action>\n\n"
5777 "\t The available handlers are:\n\n"
5778 "\t onmatch(matching.event) - invoke on addition or update\n"
5779 "\t onmax(var) - invoke if var exceeds current max\n"
5780 "\t onchange(var) - invoke action if var changes\n\n"
5781 "\t The available actions are:\n\n"
5782 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5783 "\t save(field,...) - save current event fields\n"
5784 #ifdef CONFIG_TRACER_SNAPSHOT
5785 "\t snapshot() - snapshot the trace buffer\n\n"
5786 #endif
5787 #ifdef CONFIG_SYNTH_EVENTS
5788 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5789 "\t Write into this file to define/undefine new synthetic events.\n"
5790 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5791 #endif
5792 #endif
5793 ;
5794
5795 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5796 tracing_readme_read(struct file *filp, char __user *ubuf,
5797 size_t cnt, loff_t *ppos)
5798 {
5799 return simple_read_from_buffer(ubuf, cnt, ppos,
5800 readme_msg, strlen(readme_msg));
5801 }
5802
5803 static const struct file_operations tracing_readme_fops = {
5804 .open = tracing_open_generic,
5805 .read = tracing_readme_read,
5806 .llseek = generic_file_llseek,
5807 };
5808
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5809 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5810 {
5811 int pid = ++(*pos);
5812
5813 return trace_find_tgid_ptr(pid);
5814 }
5815
saved_tgids_start(struct seq_file * m,loff_t * pos)5816 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5817 {
5818 int pid = *pos;
5819
5820 return trace_find_tgid_ptr(pid);
5821 }
5822
saved_tgids_stop(struct seq_file * m,void * v)5823 static void saved_tgids_stop(struct seq_file *m, void *v)
5824 {
5825 }
5826
saved_tgids_show(struct seq_file * m,void * v)5827 static int saved_tgids_show(struct seq_file *m, void *v)
5828 {
5829 int *entry = (int *)v;
5830 int pid = entry - tgid_map;
5831 int tgid = *entry;
5832
5833 if (tgid == 0)
5834 return SEQ_SKIP;
5835
5836 seq_printf(m, "%d %d\n", pid, tgid);
5837 return 0;
5838 }
5839
5840 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5841 .start = saved_tgids_start,
5842 .stop = saved_tgids_stop,
5843 .next = saved_tgids_next,
5844 .show = saved_tgids_show,
5845 };
5846
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5847 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5848 {
5849 int ret;
5850
5851 ret = tracing_check_open_get_tr(NULL);
5852 if (ret)
5853 return ret;
5854
5855 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5856 }
5857
5858
5859 static const struct file_operations tracing_saved_tgids_fops = {
5860 .open = tracing_saved_tgids_open,
5861 .read = seq_read,
5862 .llseek = seq_lseek,
5863 .release = seq_release,
5864 };
5865
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5866 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5867 {
5868 unsigned int *ptr = v;
5869
5870 if (*pos || m->count)
5871 ptr++;
5872
5873 (*pos)++;
5874
5875 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5876 ptr++) {
5877 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5878 continue;
5879
5880 return ptr;
5881 }
5882
5883 return NULL;
5884 }
5885
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5886 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5887 {
5888 void *v;
5889 loff_t l = 0;
5890
5891 preempt_disable();
5892 arch_spin_lock(&trace_cmdline_lock);
5893
5894 v = &savedcmd->map_cmdline_to_pid[0];
5895 while (l <= *pos) {
5896 v = saved_cmdlines_next(m, v, &l);
5897 if (!v)
5898 return NULL;
5899 }
5900
5901 return v;
5902 }
5903
saved_cmdlines_stop(struct seq_file * m,void * v)5904 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5905 {
5906 arch_spin_unlock(&trace_cmdline_lock);
5907 preempt_enable();
5908 }
5909
saved_cmdlines_show(struct seq_file * m,void * v)5910 static int saved_cmdlines_show(struct seq_file *m, void *v)
5911 {
5912 char buf[TASK_COMM_LEN];
5913 unsigned int *pid = v;
5914
5915 __trace_find_cmdline(*pid, buf);
5916 seq_printf(m, "%d %s\n", *pid, buf);
5917 return 0;
5918 }
5919
5920 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5921 .start = saved_cmdlines_start,
5922 .next = saved_cmdlines_next,
5923 .stop = saved_cmdlines_stop,
5924 .show = saved_cmdlines_show,
5925 };
5926
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5927 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5928 {
5929 int ret;
5930
5931 ret = tracing_check_open_get_tr(NULL);
5932 if (ret)
5933 return ret;
5934
5935 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5936 }
5937
5938 static const struct file_operations tracing_saved_cmdlines_fops = {
5939 .open = tracing_saved_cmdlines_open,
5940 .read = seq_read,
5941 .llseek = seq_lseek,
5942 .release = seq_release,
5943 };
5944
5945 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5946 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5947 size_t cnt, loff_t *ppos)
5948 {
5949 char buf[64];
5950 int r;
5951
5952 preempt_disable();
5953 arch_spin_lock(&trace_cmdline_lock);
5954 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5955 arch_spin_unlock(&trace_cmdline_lock);
5956 preempt_enable();
5957
5958 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5959 }
5960
tracing_resize_saved_cmdlines(unsigned int val)5961 static int tracing_resize_saved_cmdlines(unsigned int val)
5962 {
5963 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5964
5965 s = allocate_cmdlines_buffer(val);
5966 if (!s)
5967 return -ENOMEM;
5968
5969 preempt_disable();
5970 arch_spin_lock(&trace_cmdline_lock);
5971 savedcmd_temp = savedcmd;
5972 savedcmd = s;
5973 arch_spin_unlock(&trace_cmdline_lock);
5974 preempt_enable();
5975 free_saved_cmdlines_buffer(savedcmd_temp);
5976
5977 return 0;
5978 }
5979
5980 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5981 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5982 size_t cnt, loff_t *ppos)
5983 {
5984 unsigned long val;
5985 int ret;
5986
5987 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5988 if (ret)
5989 return ret;
5990
5991 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5992 if (!val || val > PID_MAX_DEFAULT)
5993 return -EINVAL;
5994
5995 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5996 if (ret < 0)
5997 return ret;
5998
5999 *ppos += cnt;
6000
6001 return cnt;
6002 }
6003
6004 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6005 .open = tracing_open_generic,
6006 .read = tracing_saved_cmdlines_size_read,
6007 .write = tracing_saved_cmdlines_size_write,
6008 };
6009
6010 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6011 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6012 update_eval_map(union trace_eval_map_item *ptr)
6013 {
6014 if (!ptr->map.eval_string) {
6015 if (ptr->tail.next) {
6016 ptr = ptr->tail.next;
6017 /* Set ptr to the next real item (skip head) */
6018 ptr++;
6019 } else
6020 return NULL;
6021 }
6022 return ptr;
6023 }
6024
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6025 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6026 {
6027 union trace_eval_map_item *ptr = v;
6028
6029 /*
6030 * Paranoid! If ptr points to end, we don't want to increment past it.
6031 * This really should never happen.
6032 */
6033 (*pos)++;
6034 ptr = update_eval_map(ptr);
6035 if (WARN_ON_ONCE(!ptr))
6036 return NULL;
6037
6038 ptr++;
6039 ptr = update_eval_map(ptr);
6040
6041 return ptr;
6042 }
6043
eval_map_start(struct seq_file * m,loff_t * pos)6044 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6045 {
6046 union trace_eval_map_item *v;
6047 loff_t l = 0;
6048
6049 mutex_lock(&trace_eval_mutex);
6050
6051 v = trace_eval_maps;
6052 if (v)
6053 v++;
6054
6055 while (v && l < *pos) {
6056 v = eval_map_next(m, v, &l);
6057 }
6058
6059 return v;
6060 }
6061
eval_map_stop(struct seq_file * m,void * v)6062 static void eval_map_stop(struct seq_file *m, void *v)
6063 {
6064 mutex_unlock(&trace_eval_mutex);
6065 }
6066
eval_map_show(struct seq_file * m,void * v)6067 static int eval_map_show(struct seq_file *m, void *v)
6068 {
6069 union trace_eval_map_item *ptr = v;
6070
6071 seq_printf(m, "%s %ld (%s)\n",
6072 ptr->map.eval_string, ptr->map.eval_value,
6073 ptr->map.system);
6074
6075 return 0;
6076 }
6077
6078 static const struct seq_operations tracing_eval_map_seq_ops = {
6079 .start = eval_map_start,
6080 .next = eval_map_next,
6081 .stop = eval_map_stop,
6082 .show = eval_map_show,
6083 };
6084
tracing_eval_map_open(struct inode * inode,struct file * filp)6085 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6086 {
6087 int ret;
6088
6089 ret = tracing_check_open_get_tr(NULL);
6090 if (ret)
6091 return ret;
6092
6093 return seq_open(filp, &tracing_eval_map_seq_ops);
6094 }
6095
6096 static const struct file_operations tracing_eval_map_fops = {
6097 .open = tracing_eval_map_open,
6098 .read = seq_read,
6099 .llseek = seq_lseek,
6100 .release = seq_release,
6101 };
6102
6103 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6104 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6105 {
6106 /* Return tail of array given the head */
6107 return ptr + ptr->head.length + 1;
6108 }
6109
6110 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6111 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6112 int len)
6113 {
6114 struct trace_eval_map **stop;
6115 struct trace_eval_map **map;
6116 union trace_eval_map_item *map_array;
6117 union trace_eval_map_item *ptr;
6118
6119 stop = start + len;
6120
6121 /*
6122 * The trace_eval_maps contains the map plus a head and tail item,
6123 * where the head holds the module and length of array, and the
6124 * tail holds a pointer to the next list.
6125 */
6126 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6127 if (!map_array) {
6128 pr_warn("Unable to allocate trace eval mapping\n");
6129 return;
6130 }
6131
6132 mutex_lock(&trace_eval_mutex);
6133
6134 if (!trace_eval_maps)
6135 trace_eval_maps = map_array;
6136 else {
6137 ptr = trace_eval_maps;
6138 for (;;) {
6139 ptr = trace_eval_jmp_to_tail(ptr);
6140 if (!ptr->tail.next)
6141 break;
6142 ptr = ptr->tail.next;
6143
6144 }
6145 ptr->tail.next = map_array;
6146 }
6147 map_array->head.mod = mod;
6148 map_array->head.length = len;
6149 map_array++;
6150
6151 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6152 map_array->map = **map;
6153 map_array++;
6154 }
6155 memset(map_array, 0, sizeof(*map_array));
6156
6157 mutex_unlock(&trace_eval_mutex);
6158 }
6159
trace_create_eval_file(struct dentry * d_tracer)6160 static void trace_create_eval_file(struct dentry *d_tracer)
6161 {
6162 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6163 NULL, &tracing_eval_map_fops);
6164 }
6165
6166 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6167 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6168 static inline void trace_insert_eval_map_file(struct module *mod,
6169 struct trace_eval_map **start, int len) { }
6170 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6171
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6172 static void trace_insert_eval_map(struct module *mod,
6173 struct trace_eval_map **start, int len)
6174 {
6175 struct trace_eval_map **map;
6176
6177 if (len <= 0)
6178 return;
6179
6180 map = start;
6181
6182 trace_event_eval_update(map, len);
6183
6184 trace_insert_eval_map_file(mod, start, len);
6185 }
6186
6187 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6188 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6189 size_t cnt, loff_t *ppos)
6190 {
6191 struct trace_array *tr = filp->private_data;
6192 char buf[MAX_TRACER_SIZE+2];
6193 int r;
6194
6195 mutex_lock(&trace_types_lock);
6196 r = sprintf(buf, "%s\n", tr->current_trace->name);
6197 mutex_unlock(&trace_types_lock);
6198
6199 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6200 }
6201
tracer_init(struct tracer * t,struct trace_array * tr)6202 int tracer_init(struct tracer *t, struct trace_array *tr)
6203 {
6204 tracing_reset_online_cpus(&tr->array_buffer);
6205 return t->init(tr);
6206 }
6207
set_buffer_entries(struct array_buffer * buf,unsigned long val)6208 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6209 {
6210 int cpu;
6211
6212 for_each_tracing_cpu(cpu)
6213 per_cpu_ptr(buf->data, cpu)->entries = val;
6214 }
6215
update_buffer_entries(struct array_buffer * buf,int cpu)6216 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6217 {
6218 if (cpu == RING_BUFFER_ALL_CPUS) {
6219 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6220 } else {
6221 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6222 }
6223 }
6224
6225 #ifdef CONFIG_TRACER_MAX_TRACE
6226 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6227 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6228 struct array_buffer *size_buf, int cpu_id)
6229 {
6230 int cpu, ret = 0;
6231
6232 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6233 for_each_tracing_cpu(cpu) {
6234 ret = ring_buffer_resize(trace_buf->buffer,
6235 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6236 if (ret < 0)
6237 break;
6238 per_cpu_ptr(trace_buf->data, cpu)->entries =
6239 per_cpu_ptr(size_buf->data, cpu)->entries;
6240 }
6241 } else {
6242 ret = ring_buffer_resize(trace_buf->buffer,
6243 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6244 if (ret == 0)
6245 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6246 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6247 }
6248
6249 return ret;
6250 }
6251 #endif /* CONFIG_TRACER_MAX_TRACE */
6252
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6253 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6254 unsigned long size, int cpu)
6255 {
6256 int ret;
6257
6258 /*
6259 * If kernel or user changes the size of the ring buffer
6260 * we use the size that was given, and we can forget about
6261 * expanding it later.
6262 */
6263 ring_buffer_expanded = true;
6264
6265 /* May be called before buffers are initialized */
6266 if (!tr->array_buffer.buffer)
6267 return 0;
6268
6269 /* Do not allow tracing while resizing ring buffer */
6270 tracing_stop_tr(tr);
6271
6272 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6273 if (ret < 0)
6274 goto out_start;
6275
6276 #ifdef CONFIG_TRACER_MAX_TRACE
6277 if (!tr->allocated_snapshot)
6278 goto out;
6279
6280 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6281 if (ret < 0) {
6282 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6283 &tr->array_buffer, cpu);
6284 if (r < 0) {
6285 /*
6286 * AARGH! We are left with different
6287 * size max buffer!!!!
6288 * The max buffer is our "snapshot" buffer.
6289 * When a tracer needs a snapshot (one of the
6290 * latency tracers), it swaps the max buffer
6291 * with the saved snap shot. We succeeded to
6292 * update the size of the main buffer, but failed to
6293 * update the size of the max buffer. But when we tried
6294 * to reset the main buffer to the original size, we
6295 * failed there too. This is very unlikely to
6296 * happen, but if it does, warn and kill all
6297 * tracing.
6298 */
6299 WARN_ON(1);
6300 tracing_disabled = 1;
6301 }
6302 goto out_start;
6303 }
6304
6305 update_buffer_entries(&tr->max_buffer, cpu);
6306
6307 out:
6308 #endif /* CONFIG_TRACER_MAX_TRACE */
6309
6310 update_buffer_entries(&tr->array_buffer, cpu);
6311 out_start:
6312 tracing_start_tr(tr);
6313 return ret;
6314 }
6315
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6316 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6317 unsigned long size, int cpu_id)
6318 {
6319 int ret;
6320
6321 mutex_lock(&trace_types_lock);
6322
6323 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6324 /* make sure, this cpu is enabled in the mask */
6325 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6326 ret = -EINVAL;
6327 goto out;
6328 }
6329 }
6330
6331 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6332 if (ret < 0)
6333 ret = -ENOMEM;
6334
6335 out:
6336 mutex_unlock(&trace_types_lock);
6337
6338 return ret;
6339 }
6340
6341
6342 /**
6343 * tracing_update_buffers - used by tracing facility to expand ring buffers
6344 *
6345 * To save on memory when the tracing is never used on a system with it
6346 * configured in. The ring buffers are set to a minimum size. But once
6347 * a user starts to use the tracing facility, then they need to grow
6348 * to their default size.
6349 *
6350 * This function is to be called when a tracer is about to be used.
6351 */
tracing_update_buffers(void)6352 int tracing_update_buffers(void)
6353 {
6354 int ret = 0;
6355
6356 mutex_lock(&trace_types_lock);
6357 if (!ring_buffer_expanded)
6358 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6359 RING_BUFFER_ALL_CPUS);
6360 mutex_unlock(&trace_types_lock);
6361
6362 return ret;
6363 }
6364
6365 struct trace_option_dentry;
6366
6367 static void
6368 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6369
6370 /*
6371 * Used to clear out the tracer before deletion of an instance.
6372 * Must have trace_types_lock held.
6373 */
tracing_set_nop(struct trace_array * tr)6374 static void tracing_set_nop(struct trace_array *tr)
6375 {
6376 if (tr->current_trace == &nop_trace)
6377 return;
6378
6379 tr->current_trace->enabled--;
6380
6381 if (tr->current_trace->reset)
6382 tr->current_trace->reset(tr);
6383
6384 tr->current_trace = &nop_trace;
6385 }
6386
6387 static bool tracer_options_updated;
6388
add_tracer_options(struct trace_array * tr,struct tracer * t)6389 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6390 {
6391 /* Only enable if the directory has been created already. */
6392 if (!tr->dir)
6393 return;
6394
6395 /* Only create trace option files after update_tracer_options finish */
6396 if (!tracer_options_updated)
6397 return;
6398
6399 create_trace_option_files(tr, t);
6400 }
6401
tracing_set_tracer(struct trace_array * tr,const char * buf)6402 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6403 {
6404 struct tracer *t;
6405 #ifdef CONFIG_TRACER_MAX_TRACE
6406 bool had_max_tr;
6407 #endif
6408 int ret = 0;
6409
6410 mutex_lock(&trace_types_lock);
6411
6412 if (!ring_buffer_expanded) {
6413 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6414 RING_BUFFER_ALL_CPUS);
6415 if (ret < 0)
6416 goto out;
6417 ret = 0;
6418 }
6419
6420 for (t = trace_types; t; t = t->next) {
6421 if (strcmp(t->name, buf) == 0)
6422 break;
6423 }
6424 if (!t) {
6425 ret = -EINVAL;
6426 goto out;
6427 }
6428 if (t == tr->current_trace)
6429 goto out;
6430
6431 #ifdef CONFIG_TRACER_SNAPSHOT
6432 if (t->use_max_tr) {
6433 local_irq_disable();
6434 arch_spin_lock(&tr->max_lock);
6435 if (tr->cond_snapshot)
6436 ret = -EBUSY;
6437 arch_spin_unlock(&tr->max_lock);
6438 local_irq_enable();
6439 if (ret)
6440 goto out;
6441 }
6442 #endif
6443 /* Some tracers won't work on kernel command line */
6444 if (system_state < SYSTEM_RUNNING && t->noboot) {
6445 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6446 t->name);
6447 goto out;
6448 }
6449
6450 /* Some tracers are only allowed for the top level buffer */
6451 if (!trace_ok_for_array(t, tr)) {
6452 ret = -EINVAL;
6453 goto out;
6454 }
6455
6456 /* If trace pipe files are being read, we can't change the tracer */
6457 if (tr->trace_ref) {
6458 ret = -EBUSY;
6459 goto out;
6460 }
6461
6462 trace_branch_disable();
6463
6464 tr->current_trace->enabled--;
6465
6466 if (tr->current_trace->reset)
6467 tr->current_trace->reset(tr);
6468
6469 #ifdef CONFIG_TRACER_MAX_TRACE
6470 had_max_tr = tr->current_trace->use_max_tr;
6471
6472 /* Current trace needs to be nop_trace before synchronize_rcu */
6473 tr->current_trace = &nop_trace;
6474
6475 if (had_max_tr && !t->use_max_tr) {
6476 /*
6477 * We need to make sure that the update_max_tr sees that
6478 * current_trace changed to nop_trace to keep it from
6479 * swapping the buffers after we resize it.
6480 * The update_max_tr is called from interrupts disabled
6481 * so a synchronized_sched() is sufficient.
6482 */
6483 synchronize_rcu();
6484 free_snapshot(tr);
6485 }
6486
6487 if (t->use_max_tr && !tr->allocated_snapshot) {
6488 ret = tracing_alloc_snapshot_instance(tr);
6489 if (ret < 0)
6490 goto out;
6491 }
6492 #else
6493 tr->current_trace = &nop_trace;
6494 #endif
6495
6496 if (t->init) {
6497 ret = tracer_init(t, tr);
6498 if (ret)
6499 goto out;
6500 }
6501
6502 tr->current_trace = t;
6503 tr->current_trace->enabled++;
6504 trace_branch_enable(tr);
6505 out:
6506 mutex_unlock(&trace_types_lock);
6507
6508 return ret;
6509 }
6510
6511 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6512 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6513 size_t cnt, loff_t *ppos)
6514 {
6515 struct trace_array *tr = filp->private_data;
6516 char buf[MAX_TRACER_SIZE+1];
6517 int i;
6518 size_t ret;
6519 int err;
6520
6521 ret = cnt;
6522
6523 if (cnt > MAX_TRACER_SIZE)
6524 cnt = MAX_TRACER_SIZE;
6525
6526 if (copy_from_user(buf, ubuf, cnt))
6527 return -EFAULT;
6528
6529 buf[cnt] = 0;
6530
6531 /* strip ending whitespace. */
6532 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6533 buf[i] = 0;
6534
6535 err = tracing_set_tracer(tr, buf);
6536 if (err)
6537 return err;
6538
6539 *ppos += ret;
6540
6541 return ret;
6542 }
6543
6544 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6545 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6546 size_t cnt, loff_t *ppos)
6547 {
6548 char buf[64];
6549 int r;
6550
6551 r = snprintf(buf, sizeof(buf), "%ld\n",
6552 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6553 if (r > sizeof(buf))
6554 r = sizeof(buf);
6555 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6556 }
6557
6558 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6559 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6560 size_t cnt, loff_t *ppos)
6561 {
6562 unsigned long val;
6563 int ret;
6564
6565 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6566 if (ret)
6567 return ret;
6568
6569 *ptr = val * 1000;
6570
6571 return cnt;
6572 }
6573
6574 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6575 tracing_thresh_read(struct file *filp, char __user *ubuf,
6576 size_t cnt, loff_t *ppos)
6577 {
6578 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6579 }
6580
6581 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6582 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6583 size_t cnt, loff_t *ppos)
6584 {
6585 struct trace_array *tr = filp->private_data;
6586 int ret;
6587
6588 mutex_lock(&trace_types_lock);
6589 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6590 if (ret < 0)
6591 goto out;
6592
6593 if (tr->current_trace->update_thresh) {
6594 ret = tr->current_trace->update_thresh(tr);
6595 if (ret < 0)
6596 goto out;
6597 }
6598
6599 ret = cnt;
6600 out:
6601 mutex_unlock(&trace_types_lock);
6602
6603 return ret;
6604 }
6605
6606 #ifdef CONFIG_TRACER_MAX_TRACE
6607
6608 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6609 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6610 size_t cnt, loff_t *ppos)
6611 {
6612 struct trace_array *tr = filp->private_data;
6613
6614 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6615 }
6616
6617 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6618 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6619 size_t cnt, loff_t *ppos)
6620 {
6621 struct trace_array *tr = filp->private_data;
6622
6623 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6624 }
6625
6626 #endif
6627
tracing_open_pipe(struct inode * inode,struct file * filp)6628 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6629 {
6630 struct trace_array *tr = inode->i_private;
6631 struct trace_iterator *iter;
6632 int ret;
6633
6634 ret = tracing_check_open_get_tr(tr);
6635 if (ret)
6636 return ret;
6637
6638 mutex_lock(&trace_types_lock);
6639
6640 /* create a buffer to store the information to pass to userspace */
6641 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6642 if (!iter) {
6643 ret = -ENOMEM;
6644 __trace_array_put(tr);
6645 goto out;
6646 }
6647
6648 trace_seq_init(&iter->seq);
6649 iter->trace = tr->current_trace;
6650
6651 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6652 ret = -ENOMEM;
6653 goto fail;
6654 }
6655
6656 /* trace pipe does not show start of buffer */
6657 cpumask_setall(iter->started);
6658
6659 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6660 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6661
6662 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6663 if (trace_clocks[tr->clock_id].in_ns)
6664 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6665
6666 iter->tr = tr;
6667 iter->array_buffer = &tr->array_buffer;
6668 iter->cpu_file = tracing_get_cpu(inode);
6669 mutex_init(&iter->mutex);
6670 filp->private_data = iter;
6671
6672 if (iter->trace->pipe_open)
6673 iter->trace->pipe_open(iter);
6674
6675 nonseekable_open(inode, filp);
6676
6677 tr->trace_ref++;
6678 out:
6679 mutex_unlock(&trace_types_lock);
6680 return ret;
6681
6682 fail:
6683 kfree(iter);
6684 __trace_array_put(tr);
6685 mutex_unlock(&trace_types_lock);
6686 return ret;
6687 }
6688
tracing_release_pipe(struct inode * inode,struct file * file)6689 static int tracing_release_pipe(struct inode *inode, struct file *file)
6690 {
6691 struct trace_iterator *iter = file->private_data;
6692 struct trace_array *tr = inode->i_private;
6693
6694 mutex_lock(&trace_types_lock);
6695
6696 tr->trace_ref--;
6697
6698 if (iter->trace->pipe_close)
6699 iter->trace->pipe_close(iter);
6700
6701 mutex_unlock(&trace_types_lock);
6702
6703 free_cpumask_var(iter->started);
6704 kfree(iter->fmt);
6705 kfree(iter->temp);
6706 mutex_destroy(&iter->mutex);
6707 kfree(iter);
6708
6709 trace_array_put(tr);
6710
6711 return 0;
6712 }
6713
6714 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6715 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6716 {
6717 struct trace_array *tr = iter->tr;
6718
6719 /* Iterators are static, they should be filled or empty */
6720 if (trace_buffer_iter(iter, iter->cpu_file))
6721 return EPOLLIN | EPOLLRDNORM;
6722
6723 if (tr->trace_flags & TRACE_ITER_BLOCK)
6724 /*
6725 * Always select as readable when in blocking mode
6726 */
6727 return EPOLLIN | EPOLLRDNORM;
6728 else
6729 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6730 filp, poll_table, iter->tr->buffer_percent);
6731 }
6732
6733 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6734 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6735 {
6736 struct trace_iterator *iter = filp->private_data;
6737
6738 return trace_poll(iter, filp, poll_table);
6739 }
6740
6741 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6742 static int tracing_wait_pipe(struct file *filp)
6743 {
6744 struct trace_iterator *iter = filp->private_data;
6745 int ret;
6746
6747 while (trace_empty(iter)) {
6748
6749 if ((filp->f_flags & O_NONBLOCK)) {
6750 return -EAGAIN;
6751 }
6752
6753 /*
6754 * We block until we read something and tracing is disabled.
6755 * We still block if tracing is disabled, but we have never
6756 * read anything. This allows a user to cat this file, and
6757 * then enable tracing. But after we have read something,
6758 * we give an EOF when tracing is again disabled.
6759 *
6760 * iter->pos will be 0 if we haven't read anything.
6761 */
6762 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6763 break;
6764
6765 mutex_unlock(&iter->mutex);
6766
6767 ret = wait_on_pipe(iter, 0);
6768
6769 mutex_lock(&iter->mutex);
6770
6771 if (ret)
6772 return ret;
6773 }
6774
6775 return 1;
6776 }
6777
6778 /*
6779 * Consumer reader.
6780 */
6781 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6782 tracing_read_pipe(struct file *filp, char __user *ubuf,
6783 size_t cnt, loff_t *ppos)
6784 {
6785 struct trace_iterator *iter = filp->private_data;
6786 ssize_t sret;
6787
6788 /*
6789 * Avoid more than one consumer on a single file descriptor
6790 * This is just a matter of traces coherency, the ring buffer itself
6791 * is protected.
6792 */
6793 mutex_lock(&iter->mutex);
6794
6795 /* return any leftover data */
6796 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6797 if (sret != -EBUSY)
6798 goto out;
6799
6800 trace_seq_init(&iter->seq);
6801
6802 if (iter->trace->read) {
6803 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6804 if (sret)
6805 goto out;
6806 }
6807
6808 waitagain:
6809 sret = tracing_wait_pipe(filp);
6810 if (sret <= 0)
6811 goto out;
6812
6813 /* stop when tracing is finished */
6814 if (trace_empty(iter)) {
6815 sret = 0;
6816 goto out;
6817 }
6818
6819 if (cnt >= PAGE_SIZE)
6820 cnt = PAGE_SIZE - 1;
6821
6822 /* reset all but tr, trace, and overruns */
6823 memset(&iter->seq, 0,
6824 sizeof(struct trace_iterator) -
6825 offsetof(struct trace_iterator, seq));
6826 cpumask_clear(iter->started);
6827 trace_seq_init(&iter->seq);
6828 iter->pos = -1;
6829
6830 trace_event_read_lock();
6831 trace_access_lock(iter->cpu_file);
6832 while (trace_find_next_entry_inc(iter) != NULL) {
6833 enum print_line_t ret;
6834 int save_len = iter->seq.seq.len;
6835
6836 ret = print_trace_line(iter);
6837 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6838 /*
6839 * If one print_trace_line() fills entire trace_seq in one shot,
6840 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6841 * In this case, we need to consume it, otherwise, loop will peek
6842 * this event next time, resulting in an infinite loop.
6843 */
6844 if (save_len == 0) {
6845 iter->seq.full = 0;
6846 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6847 trace_consume(iter);
6848 break;
6849 }
6850
6851 /* In other cases, don't print partial lines */
6852 iter->seq.seq.len = save_len;
6853 break;
6854 }
6855 if (ret != TRACE_TYPE_NO_CONSUME)
6856 trace_consume(iter);
6857
6858 if (trace_seq_used(&iter->seq) >= cnt)
6859 break;
6860
6861 /*
6862 * Setting the full flag means we reached the trace_seq buffer
6863 * size and we should leave by partial output condition above.
6864 * One of the trace_seq_* functions is not used properly.
6865 */
6866 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6867 iter->ent->type);
6868 }
6869 trace_access_unlock(iter->cpu_file);
6870 trace_event_read_unlock();
6871
6872 /* Now copy what we have to the user */
6873 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6874 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6875 trace_seq_init(&iter->seq);
6876
6877 /*
6878 * If there was nothing to send to user, in spite of consuming trace
6879 * entries, go back to wait for more entries.
6880 */
6881 if (sret == -EBUSY)
6882 goto waitagain;
6883
6884 out:
6885 mutex_unlock(&iter->mutex);
6886
6887 return sret;
6888 }
6889
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6890 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6891 unsigned int idx)
6892 {
6893 __free_page(spd->pages[idx]);
6894 }
6895
6896 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6897 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6898 {
6899 size_t count;
6900 int save_len;
6901 int ret;
6902
6903 /* Seq buffer is page-sized, exactly what we need. */
6904 for (;;) {
6905 save_len = iter->seq.seq.len;
6906 ret = print_trace_line(iter);
6907
6908 if (trace_seq_has_overflowed(&iter->seq)) {
6909 iter->seq.seq.len = save_len;
6910 break;
6911 }
6912
6913 /*
6914 * This should not be hit, because it should only
6915 * be set if the iter->seq overflowed. But check it
6916 * anyway to be safe.
6917 */
6918 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6919 iter->seq.seq.len = save_len;
6920 break;
6921 }
6922
6923 count = trace_seq_used(&iter->seq) - save_len;
6924 if (rem < count) {
6925 rem = 0;
6926 iter->seq.seq.len = save_len;
6927 break;
6928 }
6929
6930 if (ret != TRACE_TYPE_NO_CONSUME)
6931 trace_consume(iter);
6932 rem -= count;
6933 if (!trace_find_next_entry_inc(iter)) {
6934 rem = 0;
6935 iter->ent = NULL;
6936 break;
6937 }
6938 }
6939
6940 return rem;
6941 }
6942
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6943 static ssize_t tracing_splice_read_pipe(struct file *filp,
6944 loff_t *ppos,
6945 struct pipe_inode_info *pipe,
6946 size_t len,
6947 unsigned int flags)
6948 {
6949 struct page *pages_def[PIPE_DEF_BUFFERS];
6950 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6951 struct trace_iterator *iter = filp->private_data;
6952 struct splice_pipe_desc spd = {
6953 .pages = pages_def,
6954 .partial = partial_def,
6955 .nr_pages = 0, /* This gets updated below. */
6956 .nr_pages_max = PIPE_DEF_BUFFERS,
6957 .ops = &default_pipe_buf_ops,
6958 .spd_release = tracing_spd_release_pipe,
6959 };
6960 ssize_t ret;
6961 size_t rem;
6962 unsigned int i;
6963
6964 if (splice_grow_spd(pipe, &spd))
6965 return -ENOMEM;
6966
6967 mutex_lock(&iter->mutex);
6968
6969 if (iter->trace->splice_read) {
6970 ret = iter->trace->splice_read(iter, filp,
6971 ppos, pipe, len, flags);
6972 if (ret)
6973 goto out_err;
6974 }
6975
6976 ret = tracing_wait_pipe(filp);
6977 if (ret <= 0)
6978 goto out_err;
6979
6980 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6981 ret = -EFAULT;
6982 goto out_err;
6983 }
6984
6985 trace_event_read_lock();
6986 trace_access_lock(iter->cpu_file);
6987
6988 /* Fill as many pages as possible. */
6989 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6990 spd.pages[i] = alloc_page(GFP_KERNEL);
6991 if (!spd.pages[i])
6992 break;
6993
6994 rem = tracing_fill_pipe_page(rem, iter);
6995
6996 /* Copy the data into the page, so we can start over. */
6997 ret = trace_seq_to_buffer(&iter->seq,
6998 page_address(spd.pages[i]),
6999 trace_seq_used(&iter->seq));
7000 if (ret < 0) {
7001 __free_page(spd.pages[i]);
7002 break;
7003 }
7004 spd.partial[i].offset = 0;
7005 spd.partial[i].len = trace_seq_used(&iter->seq);
7006
7007 trace_seq_init(&iter->seq);
7008 }
7009
7010 trace_access_unlock(iter->cpu_file);
7011 trace_event_read_unlock();
7012 mutex_unlock(&iter->mutex);
7013
7014 spd.nr_pages = i;
7015
7016 if (i)
7017 ret = splice_to_pipe(pipe, &spd);
7018 else
7019 ret = 0;
7020 out:
7021 splice_shrink_spd(&spd);
7022 return ret;
7023
7024 out_err:
7025 mutex_unlock(&iter->mutex);
7026 goto out;
7027 }
7028
7029 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7030 tracing_entries_read(struct file *filp, char __user *ubuf,
7031 size_t cnt, loff_t *ppos)
7032 {
7033 struct inode *inode = file_inode(filp);
7034 struct trace_array *tr = inode->i_private;
7035 int cpu = tracing_get_cpu(inode);
7036 char buf[64];
7037 int r = 0;
7038 ssize_t ret;
7039
7040 mutex_lock(&trace_types_lock);
7041
7042 if (cpu == RING_BUFFER_ALL_CPUS) {
7043 int cpu, buf_size_same;
7044 unsigned long size;
7045
7046 size = 0;
7047 buf_size_same = 1;
7048 /* check if all cpu sizes are same */
7049 for_each_tracing_cpu(cpu) {
7050 /* fill in the size from first enabled cpu */
7051 if (size == 0)
7052 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7053 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7054 buf_size_same = 0;
7055 break;
7056 }
7057 }
7058
7059 if (buf_size_same) {
7060 if (!ring_buffer_expanded)
7061 r = sprintf(buf, "%lu (expanded: %lu)\n",
7062 size >> 10,
7063 trace_buf_size >> 10);
7064 else
7065 r = sprintf(buf, "%lu\n", size >> 10);
7066 } else
7067 r = sprintf(buf, "X\n");
7068 } else
7069 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7070
7071 mutex_unlock(&trace_types_lock);
7072
7073 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7074 return ret;
7075 }
7076
7077 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7078 tracing_entries_write(struct file *filp, const char __user *ubuf,
7079 size_t cnt, loff_t *ppos)
7080 {
7081 struct inode *inode = file_inode(filp);
7082 struct trace_array *tr = inode->i_private;
7083 unsigned long val;
7084 int ret;
7085
7086 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7087 if (ret)
7088 return ret;
7089
7090 /* must have at least 1 entry */
7091 if (!val)
7092 return -EINVAL;
7093
7094 /* value is in KB */
7095 val <<= 10;
7096 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7097 if (ret < 0)
7098 return ret;
7099
7100 *ppos += cnt;
7101
7102 return cnt;
7103 }
7104
7105 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7106 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7107 size_t cnt, loff_t *ppos)
7108 {
7109 struct trace_array *tr = filp->private_data;
7110 char buf[64];
7111 int r, cpu;
7112 unsigned long size = 0, expanded_size = 0;
7113
7114 mutex_lock(&trace_types_lock);
7115 for_each_tracing_cpu(cpu) {
7116 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7117 if (!ring_buffer_expanded)
7118 expanded_size += trace_buf_size >> 10;
7119 }
7120 if (ring_buffer_expanded)
7121 r = sprintf(buf, "%lu\n", size);
7122 else
7123 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7124 mutex_unlock(&trace_types_lock);
7125
7126 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7127 }
7128
7129 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7130 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7131 size_t cnt, loff_t *ppos)
7132 {
7133 /*
7134 * There is no need to read what the user has written, this function
7135 * is just to make sure that there is no error when "echo" is used
7136 */
7137
7138 *ppos += cnt;
7139
7140 return cnt;
7141 }
7142
7143 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7144 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7145 {
7146 struct trace_array *tr = inode->i_private;
7147
7148 /* disable tracing ? */
7149 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7150 tracer_tracing_off(tr);
7151 /* resize the ring buffer to 0 */
7152 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7153
7154 trace_array_put(tr);
7155
7156 return 0;
7157 }
7158
7159 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7160 tracing_mark_write(struct file *filp, const char __user *ubuf,
7161 size_t cnt, loff_t *fpos)
7162 {
7163 struct trace_array *tr = filp->private_data;
7164 struct ring_buffer_event *event;
7165 enum event_trigger_type tt = ETT_NONE;
7166 struct trace_buffer *buffer;
7167 struct print_entry *entry;
7168 ssize_t written;
7169 int size;
7170 int len;
7171
7172 /* Used in tracing_mark_raw_write() as well */
7173 #define FAULTED_STR "<faulted>"
7174 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7175
7176 if (tracing_disabled)
7177 return -EINVAL;
7178
7179 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7180 return -EINVAL;
7181
7182 if (cnt > TRACE_BUF_SIZE)
7183 cnt = TRACE_BUF_SIZE;
7184
7185 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7186
7187 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7188
7189 /* If less than "<faulted>", then make sure we can still add that */
7190 if (cnt < FAULTED_SIZE)
7191 size += FAULTED_SIZE - cnt;
7192
7193 buffer = tr->array_buffer.buffer;
7194 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7195 tracing_gen_ctx());
7196 if (unlikely(!event))
7197 /* Ring buffer disabled, return as if not open for write */
7198 return -EBADF;
7199
7200 entry = ring_buffer_event_data(event);
7201 entry->ip = _THIS_IP_;
7202
7203 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7204 if (len) {
7205 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7206 cnt = FAULTED_SIZE;
7207 written = -EFAULT;
7208 } else
7209 written = cnt;
7210
7211 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7212 /* do not add \n before testing triggers, but add \0 */
7213 entry->buf[cnt] = '\0';
7214 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7215 }
7216
7217 if (entry->buf[cnt - 1] != '\n') {
7218 entry->buf[cnt] = '\n';
7219 entry->buf[cnt + 1] = '\0';
7220 } else
7221 entry->buf[cnt] = '\0';
7222
7223 if (static_branch_unlikely(&trace_marker_exports_enabled))
7224 ftrace_exports(event, TRACE_EXPORT_MARKER);
7225 __buffer_unlock_commit(buffer, event);
7226
7227 if (tt)
7228 event_triggers_post_call(tr->trace_marker_file, tt);
7229
7230 return written;
7231 }
7232
7233 /* Limit it for now to 3K (including tag) */
7234 #define RAW_DATA_MAX_SIZE (1024*3)
7235
7236 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7237 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7238 size_t cnt, loff_t *fpos)
7239 {
7240 struct trace_array *tr = filp->private_data;
7241 struct ring_buffer_event *event;
7242 struct trace_buffer *buffer;
7243 struct raw_data_entry *entry;
7244 ssize_t written;
7245 int size;
7246 int len;
7247
7248 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7249
7250 if (tracing_disabled)
7251 return -EINVAL;
7252
7253 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7254 return -EINVAL;
7255
7256 /* The marker must at least have a tag id */
7257 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7258 return -EINVAL;
7259
7260 if (cnt > TRACE_BUF_SIZE)
7261 cnt = TRACE_BUF_SIZE;
7262
7263 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7264
7265 size = sizeof(*entry) + cnt;
7266 if (cnt < FAULT_SIZE_ID)
7267 size += FAULT_SIZE_ID - cnt;
7268
7269 buffer = tr->array_buffer.buffer;
7270 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7271 tracing_gen_ctx());
7272 if (!event)
7273 /* Ring buffer disabled, return as if not open for write */
7274 return -EBADF;
7275
7276 entry = ring_buffer_event_data(event);
7277
7278 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7279 if (len) {
7280 entry->id = -1;
7281 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7282 written = -EFAULT;
7283 } else
7284 written = cnt;
7285
7286 __buffer_unlock_commit(buffer, event);
7287
7288 return written;
7289 }
7290
tracing_clock_show(struct seq_file * m,void * v)7291 static int tracing_clock_show(struct seq_file *m, void *v)
7292 {
7293 struct trace_array *tr = m->private;
7294 int i;
7295
7296 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7297 seq_printf(m,
7298 "%s%s%s%s", i ? " " : "",
7299 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7300 i == tr->clock_id ? "]" : "");
7301 seq_putc(m, '\n');
7302
7303 return 0;
7304 }
7305
tracing_set_clock(struct trace_array * tr,const char * clockstr)7306 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7307 {
7308 int i;
7309
7310 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7311 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7312 break;
7313 }
7314 if (i == ARRAY_SIZE(trace_clocks))
7315 return -EINVAL;
7316
7317 mutex_lock(&trace_types_lock);
7318
7319 tr->clock_id = i;
7320
7321 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7322
7323 /*
7324 * New clock may not be consistent with the previous clock.
7325 * Reset the buffer so that it doesn't have incomparable timestamps.
7326 */
7327 tracing_reset_online_cpus(&tr->array_buffer);
7328
7329 #ifdef CONFIG_TRACER_MAX_TRACE
7330 if (tr->max_buffer.buffer)
7331 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7332 tracing_reset_online_cpus(&tr->max_buffer);
7333 #endif
7334
7335 mutex_unlock(&trace_types_lock);
7336
7337 return 0;
7338 }
7339
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7340 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7341 size_t cnt, loff_t *fpos)
7342 {
7343 struct seq_file *m = filp->private_data;
7344 struct trace_array *tr = m->private;
7345 char buf[64];
7346 const char *clockstr;
7347 int ret;
7348
7349 if (cnt >= sizeof(buf))
7350 return -EINVAL;
7351
7352 if (copy_from_user(buf, ubuf, cnt))
7353 return -EFAULT;
7354
7355 buf[cnt] = 0;
7356
7357 clockstr = strstrip(buf);
7358
7359 ret = tracing_set_clock(tr, clockstr);
7360 if (ret)
7361 return ret;
7362
7363 *fpos += cnt;
7364
7365 return cnt;
7366 }
7367
tracing_clock_open(struct inode * inode,struct file * file)7368 static int tracing_clock_open(struct inode *inode, struct file *file)
7369 {
7370 struct trace_array *tr = inode->i_private;
7371 int ret;
7372
7373 ret = tracing_check_open_get_tr(tr);
7374 if (ret)
7375 return ret;
7376
7377 ret = single_open(file, tracing_clock_show, inode->i_private);
7378 if (ret < 0)
7379 trace_array_put(tr);
7380
7381 return ret;
7382 }
7383
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7384 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7385 {
7386 struct trace_array *tr = m->private;
7387
7388 mutex_lock(&trace_types_lock);
7389
7390 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7391 seq_puts(m, "delta [absolute]\n");
7392 else
7393 seq_puts(m, "[delta] absolute\n");
7394
7395 mutex_unlock(&trace_types_lock);
7396
7397 return 0;
7398 }
7399
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7400 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7401 {
7402 struct trace_array *tr = inode->i_private;
7403 int ret;
7404
7405 ret = tracing_check_open_get_tr(tr);
7406 if (ret)
7407 return ret;
7408
7409 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7410 if (ret < 0)
7411 trace_array_put(tr);
7412
7413 return ret;
7414 }
7415
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7416 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7417 {
7418 if (rbe == this_cpu_read(trace_buffered_event))
7419 return ring_buffer_time_stamp(buffer);
7420
7421 return ring_buffer_event_time_stamp(buffer, rbe);
7422 }
7423
7424 /*
7425 * Set or disable using the per CPU trace_buffer_event when possible.
7426 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7427 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7428 {
7429 int ret = 0;
7430
7431 mutex_lock(&trace_types_lock);
7432
7433 if (set && tr->no_filter_buffering_ref++)
7434 goto out;
7435
7436 if (!set) {
7437 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7438 ret = -EINVAL;
7439 goto out;
7440 }
7441
7442 --tr->no_filter_buffering_ref;
7443 }
7444 out:
7445 mutex_unlock(&trace_types_lock);
7446
7447 return ret;
7448 }
7449
7450 struct ftrace_buffer_info {
7451 struct trace_iterator iter;
7452 void *spare;
7453 unsigned int spare_cpu;
7454 unsigned int read;
7455 };
7456
7457 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7458 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7459 {
7460 struct trace_array *tr = inode->i_private;
7461 struct trace_iterator *iter;
7462 struct seq_file *m;
7463 int ret;
7464
7465 ret = tracing_check_open_get_tr(tr);
7466 if (ret)
7467 return ret;
7468
7469 if (file->f_mode & FMODE_READ) {
7470 iter = __tracing_open(inode, file, true);
7471 if (IS_ERR(iter))
7472 ret = PTR_ERR(iter);
7473 } else {
7474 /* Writes still need the seq_file to hold the private data */
7475 ret = -ENOMEM;
7476 m = kzalloc(sizeof(*m), GFP_KERNEL);
7477 if (!m)
7478 goto out;
7479 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7480 if (!iter) {
7481 kfree(m);
7482 goto out;
7483 }
7484 ret = 0;
7485
7486 iter->tr = tr;
7487 iter->array_buffer = &tr->max_buffer;
7488 iter->cpu_file = tracing_get_cpu(inode);
7489 m->private = iter;
7490 file->private_data = m;
7491 }
7492 out:
7493 if (ret < 0)
7494 trace_array_put(tr);
7495
7496 return ret;
7497 }
7498
tracing_swap_cpu_buffer(void * tr)7499 static void tracing_swap_cpu_buffer(void *tr)
7500 {
7501 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7502 }
7503
7504 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7505 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7506 loff_t *ppos)
7507 {
7508 struct seq_file *m = filp->private_data;
7509 struct trace_iterator *iter = m->private;
7510 struct trace_array *tr = iter->tr;
7511 unsigned long val;
7512 int ret;
7513
7514 ret = tracing_update_buffers();
7515 if (ret < 0)
7516 return ret;
7517
7518 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7519 if (ret)
7520 return ret;
7521
7522 mutex_lock(&trace_types_lock);
7523
7524 if (tr->current_trace->use_max_tr) {
7525 ret = -EBUSY;
7526 goto out;
7527 }
7528
7529 local_irq_disable();
7530 arch_spin_lock(&tr->max_lock);
7531 if (tr->cond_snapshot)
7532 ret = -EBUSY;
7533 arch_spin_unlock(&tr->max_lock);
7534 local_irq_enable();
7535 if (ret)
7536 goto out;
7537
7538 switch (val) {
7539 case 0:
7540 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7541 ret = -EINVAL;
7542 break;
7543 }
7544 if (tr->allocated_snapshot)
7545 free_snapshot(tr);
7546 break;
7547 case 1:
7548 /* Only allow per-cpu swap if the ring buffer supports it */
7549 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7550 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7551 ret = -EINVAL;
7552 break;
7553 }
7554 #endif
7555 if (tr->allocated_snapshot)
7556 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7557 &tr->array_buffer, iter->cpu_file);
7558 else
7559 ret = tracing_alloc_snapshot_instance(tr);
7560 if (ret < 0)
7561 break;
7562 /* Now, we're going to swap */
7563 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7564 local_irq_disable();
7565 update_max_tr(tr, current, smp_processor_id(), NULL);
7566 local_irq_enable();
7567 } else {
7568 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7569 (void *)tr, 1);
7570 }
7571 break;
7572 default:
7573 if (tr->allocated_snapshot) {
7574 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7575 tracing_reset_online_cpus(&tr->max_buffer);
7576 else
7577 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7578 }
7579 break;
7580 }
7581
7582 if (ret >= 0) {
7583 *ppos += cnt;
7584 ret = cnt;
7585 }
7586 out:
7587 mutex_unlock(&trace_types_lock);
7588 return ret;
7589 }
7590
tracing_snapshot_release(struct inode * inode,struct file * file)7591 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7592 {
7593 struct seq_file *m = file->private_data;
7594 int ret;
7595
7596 ret = tracing_release(inode, file);
7597
7598 if (file->f_mode & FMODE_READ)
7599 return ret;
7600
7601 /* If write only, the seq_file is just a stub */
7602 if (m)
7603 kfree(m->private);
7604 kfree(m);
7605
7606 return 0;
7607 }
7608
7609 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7610 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7611 size_t count, loff_t *ppos);
7612 static int tracing_buffers_release(struct inode *inode, struct file *file);
7613 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7614 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7615
snapshot_raw_open(struct inode * inode,struct file * filp)7616 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7617 {
7618 struct ftrace_buffer_info *info;
7619 int ret;
7620
7621 /* The following checks for tracefs lockdown */
7622 ret = tracing_buffers_open(inode, filp);
7623 if (ret < 0)
7624 return ret;
7625
7626 info = filp->private_data;
7627
7628 if (info->iter.trace->use_max_tr) {
7629 tracing_buffers_release(inode, filp);
7630 return -EBUSY;
7631 }
7632
7633 info->iter.snapshot = true;
7634 info->iter.array_buffer = &info->iter.tr->max_buffer;
7635
7636 return ret;
7637 }
7638
7639 #endif /* CONFIG_TRACER_SNAPSHOT */
7640
7641
7642 static const struct file_operations tracing_thresh_fops = {
7643 .open = tracing_open_generic,
7644 .read = tracing_thresh_read,
7645 .write = tracing_thresh_write,
7646 .llseek = generic_file_llseek,
7647 };
7648
7649 #ifdef CONFIG_TRACER_MAX_TRACE
7650 static const struct file_operations tracing_max_lat_fops = {
7651 .open = tracing_open_generic_tr,
7652 .read = tracing_max_lat_read,
7653 .write = tracing_max_lat_write,
7654 .llseek = generic_file_llseek,
7655 .release = tracing_release_generic_tr,
7656 };
7657 #endif
7658
7659 static const struct file_operations set_tracer_fops = {
7660 .open = tracing_open_generic_tr,
7661 .read = tracing_set_trace_read,
7662 .write = tracing_set_trace_write,
7663 .llseek = generic_file_llseek,
7664 .release = tracing_release_generic_tr,
7665 };
7666
7667 static const struct file_operations tracing_pipe_fops = {
7668 .open = tracing_open_pipe,
7669 .poll = tracing_poll_pipe,
7670 .read = tracing_read_pipe,
7671 .splice_read = tracing_splice_read_pipe,
7672 .release = tracing_release_pipe,
7673 .llseek = no_llseek,
7674 };
7675
7676 static const struct file_operations tracing_entries_fops = {
7677 .open = tracing_open_generic_tr,
7678 .read = tracing_entries_read,
7679 .write = tracing_entries_write,
7680 .llseek = generic_file_llseek,
7681 .release = tracing_release_generic_tr,
7682 };
7683
7684 static const struct file_operations tracing_total_entries_fops = {
7685 .open = tracing_open_generic_tr,
7686 .read = tracing_total_entries_read,
7687 .llseek = generic_file_llseek,
7688 .release = tracing_release_generic_tr,
7689 };
7690
7691 static const struct file_operations tracing_free_buffer_fops = {
7692 .open = tracing_open_generic_tr,
7693 .write = tracing_free_buffer_write,
7694 .release = tracing_free_buffer_release,
7695 };
7696
7697 static const struct file_operations tracing_mark_fops = {
7698 .open = tracing_mark_open,
7699 .write = tracing_mark_write,
7700 .release = tracing_release_generic_tr,
7701 };
7702
7703 static const struct file_operations tracing_mark_raw_fops = {
7704 .open = tracing_mark_open,
7705 .write = tracing_mark_raw_write,
7706 .release = tracing_release_generic_tr,
7707 };
7708
7709 static const struct file_operations trace_clock_fops = {
7710 .open = tracing_clock_open,
7711 .read = seq_read,
7712 .llseek = seq_lseek,
7713 .release = tracing_single_release_tr,
7714 .write = tracing_clock_write,
7715 };
7716
7717 static const struct file_operations trace_time_stamp_mode_fops = {
7718 .open = tracing_time_stamp_mode_open,
7719 .read = seq_read,
7720 .llseek = seq_lseek,
7721 .release = tracing_single_release_tr,
7722 };
7723
7724 #ifdef CONFIG_TRACER_SNAPSHOT
7725 static const struct file_operations snapshot_fops = {
7726 .open = tracing_snapshot_open,
7727 .read = seq_read,
7728 .write = tracing_snapshot_write,
7729 .llseek = tracing_lseek,
7730 .release = tracing_snapshot_release,
7731 };
7732
7733 static const struct file_operations snapshot_raw_fops = {
7734 .open = snapshot_raw_open,
7735 .read = tracing_buffers_read,
7736 .release = tracing_buffers_release,
7737 .splice_read = tracing_buffers_splice_read,
7738 .llseek = no_llseek,
7739 };
7740
7741 #endif /* CONFIG_TRACER_SNAPSHOT */
7742
7743 /*
7744 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7745 * @filp: The active open file structure
7746 * @ubuf: The userspace provided buffer to read value into
7747 * @cnt: The maximum number of bytes to read
7748 * @ppos: The current "file" position
7749 *
7750 * This function implements the write interface for a struct trace_min_max_param.
7751 * The filp->private_data must point to a trace_min_max_param structure that
7752 * defines where to write the value, the min and the max acceptable values,
7753 * and a lock to protect the write.
7754 */
7755 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7756 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7757 {
7758 struct trace_min_max_param *param = filp->private_data;
7759 u64 val;
7760 int err;
7761
7762 if (!param)
7763 return -EFAULT;
7764
7765 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7766 if (err)
7767 return err;
7768
7769 if (param->lock)
7770 mutex_lock(param->lock);
7771
7772 if (param->min && val < *param->min)
7773 err = -EINVAL;
7774
7775 if (param->max && val > *param->max)
7776 err = -EINVAL;
7777
7778 if (!err)
7779 *param->val = val;
7780
7781 if (param->lock)
7782 mutex_unlock(param->lock);
7783
7784 if (err)
7785 return err;
7786
7787 return cnt;
7788 }
7789
7790 /*
7791 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7792 * @filp: The active open file structure
7793 * @ubuf: The userspace provided buffer to read value into
7794 * @cnt: The maximum number of bytes to read
7795 * @ppos: The current "file" position
7796 *
7797 * This function implements the read interface for a struct trace_min_max_param.
7798 * The filp->private_data must point to a trace_min_max_param struct with valid
7799 * data.
7800 */
7801 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7802 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7803 {
7804 struct trace_min_max_param *param = filp->private_data;
7805 char buf[U64_STR_SIZE];
7806 int len;
7807 u64 val;
7808
7809 if (!param)
7810 return -EFAULT;
7811
7812 val = *param->val;
7813
7814 if (cnt > sizeof(buf))
7815 cnt = sizeof(buf);
7816
7817 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7818
7819 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7820 }
7821
7822 const struct file_operations trace_min_max_fops = {
7823 .open = tracing_open_generic,
7824 .read = trace_min_max_read,
7825 .write = trace_min_max_write,
7826 };
7827
7828 #define TRACING_LOG_ERRS_MAX 8
7829 #define TRACING_LOG_LOC_MAX 128
7830
7831 #define CMD_PREFIX " Command: "
7832
7833 struct err_info {
7834 const char **errs; /* ptr to loc-specific array of err strings */
7835 u8 type; /* index into errs -> specific err string */
7836 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7837 u64 ts;
7838 };
7839
7840 struct tracing_log_err {
7841 struct list_head list;
7842 struct err_info info;
7843 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7844 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7845 };
7846
7847 static DEFINE_MUTEX(tracing_err_log_lock);
7848
get_tracing_log_err(struct trace_array * tr)7849 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7850 {
7851 struct tracing_log_err *err;
7852
7853 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7854 err = kzalloc(sizeof(*err), GFP_KERNEL);
7855 if (!err)
7856 err = ERR_PTR(-ENOMEM);
7857 else
7858 tr->n_err_log_entries++;
7859
7860 return err;
7861 }
7862
7863 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7864 list_del(&err->list);
7865
7866 return err;
7867 }
7868
7869 /**
7870 * err_pos - find the position of a string within a command for error careting
7871 * @cmd: The tracing command that caused the error
7872 * @str: The string to position the caret at within @cmd
7873 *
7874 * Finds the position of the first occurrence of @str within @cmd. The
7875 * return value can be passed to tracing_log_err() for caret placement
7876 * within @cmd.
7877 *
7878 * Returns the index within @cmd of the first occurrence of @str or 0
7879 * if @str was not found.
7880 */
err_pos(char * cmd,const char * str)7881 unsigned int err_pos(char *cmd, const char *str)
7882 {
7883 char *found;
7884
7885 if (WARN_ON(!strlen(cmd)))
7886 return 0;
7887
7888 found = strstr(cmd, str);
7889 if (found)
7890 return found - cmd;
7891
7892 return 0;
7893 }
7894
7895 /**
7896 * tracing_log_err - write an error to the tracing error log
7897 * @tr: The associated trace array for the error (NULL for top level array)
7898 * @loc: A string describing where the error occurred
7899 * @cmd: The tracing command that caused the error
7900 * @errs: The array of loc-specific static error strings
7901 * @type: The index into errs[], which produces the specific static err string
7902 * @pos: The position the caret should be placed in the cmd
7903 *
7904 * Writes an error into tracing/error_log of the form:
7905 *
7906 * <loc>: error: <text>
7907 * Command: <cmd>
7908 * ^
7909 *
7910 * tracing/error_log is a small log file containing the last
7911 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7912 * unless there has been a tracing error, and the error log can be
7913 * cleared and have its memory freed by writing the empty string in
7914 * truncation mode to it i.e. echo > tracing/error_log.
7915 *
7916 * NOTE: the @errs array along with the @type param are used to
7917 * produce a static error string - this string is not copied and saved
7918 * when the error is logged - only a pointer to it is saved. See
7919 * existing callers for examples of how static strings are typically
7920 * defined for use with tracing_log_err().
7921 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7922 void tracing_log_err(struct trace_array *tr,
7923 const char *loc, const char *cmd,
7924 const char **errs, u8 type, u8 pos)
7925 {
7926 struct tracing_log_err *err;
7927
7928 if (!tr)
7929 tr = &global_trace;
7930
7931 mutex_lock(&tracing_err_log_lock);
7932 err = get_tracing_log_err(tr);
7933 if (PTR_ERR(err) == -ENOMEM) {
7934 mutex_unlock(&tracing_err_log_lock);
7935 return;
7936 }
7937
7938 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7939 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7940
7941 err->info.errs = errs;
7942 err->info.type = type;
7943 err->info.pos = pos;
7944 err->info.ts = local_clock();
7945
7946 list_add_tail(&err->list, &tr->err_log);
7947 mutex_unlock(&tracing_err_log_lock);
7948 }
7949
clear_tracing_err_log(struct trace_array * tr)7950 static void clear_tracing_err_log(struct trace_array *tr)
7951 {
7952 struct tracing_log_err *err, *next;
7953
7954 mutex_lock(&tracing_err_log_lock);
7955 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7956 list_del(&err->list);
7957 kfree(err);
7958 }
7959
7960 tr->n_err_log_entries = 0;
7961 mutex_unlock(&tracing_err_log_lock);
7962 }
7963
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7964 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7965 {
7966 struct trace_array *tr = m->private;
7967
7968 mutex_lock(&tracing_err_log_lock);
7969
7970 return seq_list_start(&tr->err_log, *pos);
7971 }
7972
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7973 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7974 {
7975 struct trace_array *tr = m->private;
7976
7977 return seq_list_next(v, &tr->err_log, pos);
7978 }
7979
tracing_err_log_seq_stop(struct seq_file * m,void * v)7980 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7981 {
7982 mutex_unlock(&tracing_err_log_lock);
7983 }
7984
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7985 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7986 {
7987 u8 i;
7988
7989 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7990 seq_putc(m, ' ');
7991 for (i = 0; i < pos; i++)
7992 seq_putc(m, ' ');
7993 seq_puts(m, "^\n");
7994 }
7995
tracing_err_log_seq_show(struct seq_file * m,void * v)7996 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7997 {
7998 struct tracing_log_err *err = v;
7999
8000 if (err) {
8001 const char *err_text = err->info.errs[err->info.type];
8002 u64 sec = err->info.ts;
8003 u32 nsec;
8004
8005 nsec = do_div(sec, NSEC_PER_SEC);
8006 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8007 err->loc, err_text);
8008 seq_printf(m, "%s", err->cmd);
8009 tracing_err_log_show_pos(m, err->info.pos);
8010 }
8011
8012 return 0;
8013 }
8014
8015 static const struct seq_operations tracing_err_log_seq_ops = {
8016 .start = tracing_err_log_seq_start,
8017 .next = tracing_err_log_seq_next,
8018 .stop = tracing_err_log_seq_stop,
8019 .show = tracing_err_log_seq_show
8020 };
8021
tracing_err_log_open(struct inode * inode,struct file * file)8022 static int tracing_err_log_open(struct inode *inode, struct file *file)
8023 {
8024 struct trace_array *tr = inode->i_private;
8025 int ret = 0;
8026
8027 ret = tracing_check_open_get_tr(tr);
8028 if (ret)
8029 return ret;
8030
8031 /* If this file was opened for write, then erase contents */
8032 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8033 clear_tracing_err_log(tr);
8034
8035 if (file->f_mode & FMODE_READ) {
8036 ret = seq_open(file, &tracing_err_log_seq_ops);
8037 if (!ret) {
8038 struct seq_file *m = file->private_data;
8039 m->private = tr;
8040 } else {
8041 trace_array_put(tr);
8042 }
8043 }
8044 return ret;
8045 }
8046
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8047 static ssize_t tracing_err_log_write(struct file *file,
8048 const char __user *buffer,
8049 size_t count, loff_t *ppos)
8050 {
8051 return count;
8052 }
8053
tracing_err_log_release(struct inode * inode,struct file * file)8054 static int tracing_err_log_release(struct inode *inode, struct file *file)
8055 {
8056 struct trace_array *tr = inode->i_private;
8057
8058 trace_array_put(tr);
8059
8060 if (file->f_mode & FMODE_READ)
8061 seq_release(inode, file);
8062
8063 return 0;
8064 }
8065
8066 static const struct file_operations tracing_err_log_fops = {
8067 .open = tracing_err_log_open,
8068 .write = tracing_err_log_write,
8069 .read = seq_read,
8070 .llseek = tracing_lseek,
8071 .release = tracing_err_log_release,
8072 };
8073
tracing_buffers_open(struct inode * inode,struct file * filp)8074 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8075 {
8076 struct trace_array *tr = inode->i_private;
8077 struct ftrace_buffer_info *info;
8078 int ret;
8079
8080 ret = tracing_check_open_get_tr(tr);
8081 if (ret)
8082 return ret;
8083
8084 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8085 if (!info) {
8086 trace_array_put(tr);
8087 return -ENOMEM;
8088 }
8089
8090 mutex_lock(&trace_types_lock);
8091
8092 info->iter.tr = tr;
8093 info->iter.cpu_file = tracing_get_cpu(inode);
8094 info->iter.trace = tr->current_trace;
8095 info->iter.array_buffer = &tr->array_buffer;
8096 info->spare = NULL;
8097 /* Force reading ring buffer for first read */
8098 info->read = (unsigned int)-1;
8099
8100 filp->private_data = info;
8101
8102 tr->trace_ref++;
8103
8104 mutex_unlock(&trace_types_lock);
8105
8106 ret = nonseekable_open(inode, filp);
8107 if (ret < 0)
8108 trace_array_put(tr);
8109
8110 return ret;
8111 }
8112
8113 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8114 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8115 {
8116 struct ftrace_buffer_info *info = filp->private_data;
8117 struct trace_iterator *iter = &info->iter;
8118
8119 return trace_poll(iter, filp, poll_table);
8120 }
8121
8122 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8123 tracing_buffers_read(struct file *filp, char __user *ubuf,
8124 size_t count, loff_t *ppos)
8125 {
8126 struct ftrace_buffer_info *info = filp->private_data;
8127 struct trace_iterator *iter = &info->iter;
8128 ssize_t ret = 0;
8129 ssize_t size;
8130
8131 if (!count)
8132 return 0;
8133
8134 #ifdef CONFIG_TRACER_MAX_TRACE
8135 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8136 return -EBUSY;
8137 #endif
8138
8139 if (!info->spare) {
8140 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8141 iter->cpu_file);
8142 if (IS_ERR(info->spare)) {
8143 ret = PTR_ERR(info->spare);
8144 info->spare = NULL;
8145 } else {
8146 info->spare_cpu = iter->cpu_file;
8147 }
8148 }
8149 if (!info->spare)
8150 return ret;
8151
8152 /* Do we have previous read data to read? */
8153 if (info->read < PAGE_SIZE)
8154 goto read;
8155
8156 again:
8157 trace_access_lock(iter->cpu_file);
8158 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8159 &info->spare,
8160 count,
8161 iter->cpu_file, 0);
8162 trace_access_unlock(iter->cpu_file);
8163
8164 if (ret < 0) {
8165 if (trace_empty(iter)) {
8166 if ((filp->f_flags & O_NONBLOCK))
8167 return -EAGAIN;
8168
8169 ret = wait_on_pipe(iter, 0);
8170 if (ret)
8171 return ret;
8172
8173 goto again;
8174 }
8175 return 0;
8176 }
8177
8178 info->read = 0;
8179 read:
8180 size = PAGE_SIZE - info->read;
8181 if (size > count)
8182 size = count;
8183
8184 ret = copy_to_user(ubuf, info->spare + info->read, size);
8185 if (ret == size)
8186 return -EFAULT;
8187
8188 size -= ret;
8189
8190 *ppos += size;
8191 info->read += size;
8192
8193 return size;
8194 }
8195
tracing_buffers_release(struct inode * inode,struct file * file)8196 static int tracing_buffers_release(struct inode *inode, struct file *file)
8197 {
8198 struct ftrace_buffer_info *info = file->private_data;
8199 struct trace_iterator *iter = &info->iter;
8200
8201 mutex_lock(&trace_types_lock);
8202
8203 iter->tr->trace_ref--;
8204
8205 __trace_array_put(iter->tr);
8206
8207 iter->wait_index++;
8208 /* Make sure the waiters see the new wait_index */
8209 smp_wmb();
8210
8211 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8212
8213 if (info->spare)
8214 ring_buffer_free_read_page(iter->array_buffer->buffer,
8215 info->spare_cpu, info->spare);
8216 kvfree(info);
8217
8218 mutex_unlock(&trace_types_lock);
8219
8220 return 0;
8221 }
8222
8223 struct buffer_ref {
8224 struct trace_buffer *buffer;
8225 void *page;
8226 int cpu;
8227 refcount_t refcount;
8228 };
8229
buffer_ref_release(struct buffer_ref * ref)8230 static void buffer_ref_release(struct buffer_ref *ref)
8231 {
8232 if (!refcount_dec_and_test(&ref->refcount))
8233 return;
8234 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8235 kfree(ref);
8236 }
8237
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8238 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8239 struct pipe_buffer *buf)
8240 {
8241 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8242
8243 buffer_ref_release(ref);
8244 buf->private = 0;
8245 }
8246
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8247 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8248 struct pipe_buffer *buf)
8249 {
8250 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8251
8252 if (refcount_read(&ref->refcount) > INT_MAX/2)
8253 return false;
8254
8255 refcount_inc(&ref->refcount);
8256 return true;
8257 }
8258
8259 /* Pipe buffer operations for a buffer. */
8260 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8261 .release = buffer_pipe_buf_release,
8262 .get = buffer_pipe_buf_get,
8263 };
8264
8265 /*
8266 * Callback from splice_to_pipe(), if we need to release some pages
8267 * at the end of the spd in case we error'ed out in filling the pipe.
8268 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8269 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8270 {
8271 struct buffer_ref *ref =
8272 (struct buffer_ref *)spd->partial[i].private;
8273
8274 buffer_ref_release(ref);
8275 spd->partial[i].private = 0;
8276 }
8277
8278 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8279 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8280 struct pipe_inode_info *pipe, size_t len,
8281 unsigned int flags)
8282 {
8283 struct ftrace_buffer_info *info = file->private_data;
8284 struct trace_iterator *iter = &info->iter;
8285 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8286 struct page *pages_def[PIPE_DEF_BUFFERS];
8287 struct splice_pipe_desc spd = {
8288 .pages = pages_def,
8289 .partial = partial_def,
8290 .nr_pages_max = PIPE_DEF_BUFFERS,
8291 .ops = &buffer_pipe_buf_ops,
8292 .spd_release = buffer_spd_release,
8293 };
8294 struct buffer_ref *ref;
8295 int entries, i;
8296 ssize_t ret = 0;
8297
8298 #ifdef CONFIG_TRACER_MAX_TRACE
8299 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8300 return -EBUSY;
8301 #endif
8302
8303 if (*ppos & (PAGE_SIZE - 1))
8304 return -EINVAL;
8305
8306 if (len & (PAGE_SIZE - 1)) {
8307 if (len < PAGE_SIZE)
8308 return -EINVAL;
8309 len &= PAGE_MASK;
8310 }
8311
8312 if (splice_grow_spd(pipe, &spd))
8313 return -ENOMEM;
8314
8315 again:
8316 trace_access_lock(iter->cpu_file);
8317 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8318
8319 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8320 struct page *page;
8321 int r;
8322
8323 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8324 if (!ref) {
8325 ret = -ENOMEM;
8326 break;
8327 }
8328
8329 refcount_set(&ref->refcount, 1);
8330 ref->buffer = iter->array_buffer->buffer;
8331 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8332 if (IS_ERR(ref->page)) {
8333 ret = PTR_ERR(ref->page);
8334 ref->page = NULL;
8335 kfree(ref);
8336 break;
8337 }
8338 ref->cpu = iter->cpu_file;
8339
8340 r = ring_buffer_read_page(ref->buffer, &ref->page,
8341 len, iter->cpu_file, 1);
8342 if (r < 0) {
8343 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8344 ref->page);
8345 kfree(ref);
8346 break;
8347 }
8348
8349 page = virt_to_page(ref->page);
8350
8351 spd.pages[i] = page;
8352 spd.partial[i].len = PAGE_SIZE;
8353 spd.partial[i].offset = 0;
8354 spd.partial[i].private = (unsigned long)ref;
8355 spd.nr_pages++;
8356 *ppos += PAGE_SIZE;
8357
8358 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8359 }
8360
8361 trace_access_unlock(iter->cpu_file);
8362 spd.nr_pages = i;
8363
8364 /* did we read anything? */
8365 if (!spd.nr_pages) {
8366 long wait_index;
8367
8368 if (ret)
8369 goto out;
8370
8371 ret = -EAGAIN;
8372 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8373 goto out;
8374
8375 wait_index = READ_ONCE(iter->wait_index);
8376
8377 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8378 if (ret)
8379 goto out;
8380
8381 /* No need to wait after waking up when tracing is off */
8382 if (!tracer_tracing_is_on(iter->tr))
8383 goto out;
8384
8385 /* Make sure we see the new wait_index */
8386 smp_rmb();
8387 if (wait_index != iter->wait_index)
8388 goto out;
8389
8390 goto again;
8391 }
8392
8393 ret = splice_to_pipe(pipe, &spd);
8394 out:
8395 splice_shrink_spd(&spd);
8396
8397 return ret;
8398 }
8399
8400 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8401 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8402 {
8403 struct ftrace_buffer_info *info = file->private_data;
8404 struct trace_iterator *iter = &info->iter;
8405
8406 if (cmd)
8407 return -ENOIOCTLCMD;
8408
8409 mutex_lock(&trace_types_lock);
8410
8411 iter->wait_index++;
8412 /* Make sure the waiters see the new wait_index */
8413 smp_wmb();
8414
8415 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8416
8417 mutex_unlock(&trace_types_lock);
8418 return 0;
8419 }
8420
8421 static const struct file_operations tracing_buffers_fops = {
8422 .open = tracing_buffers_open,
8423 .read = tracing_buffers_read,
8424 .poll = tracing_buffers_poll,
8425 .release = tracing_buffers_release,
8426 .splice_read = tracing_buffers_splice_read,
8427 .unlocked_ioctl = tracing_buffers_ioctl,
8428 .llseek = no_llseek,
8429 };
8430
8431 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8432 tracing_stats_read(struct file *filp, char __user *ubuf,
8433 size_t count, loff_t *ppos)
8434 {
8435 struct inode *inode = file_inode(filp);
8436 struct trace_array *tr = inode->i_private;
8437 struct array_buffer *trace_buf = &tr->array_buffer;
8438 int cpu = tracing_get_cpu(inode);
8439 struct trace_seq *s;
8440 unsigned long cnt;
8441 unsigned long long t;
8442 unsigned long usec_rem;
8443
8444 s = kmalloc(sizeof(*s), GFP_KERNEL);
8445 if (!s)
8446 return -ENOMEM;
8447
8448 trace_seq_init(s);
8449
8450 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8451 trace_seq_printf(s, "entries: %ld\n", cnt);
8452
8453 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8454 trace_seq_printf(s, "overrun: %ld\n", cnt);
8455
8456 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8457 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8458
8459 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8460 trace_seq_printf(s, "bytes: %ld\n", cnt);
8461
8462 if (trace_clocks[tr->clock_id].in_ns) {
8463 /* local or global for trace_clock */
8464 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8465 usec_rem = do_div(t, USEC_PER_SEC);
8466 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8467 t, usec_rem);
8468
8469 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8470 usec_rem = do_div(t, USEC_PER_SEC);
8471 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8472 } else {
8473 /* counter or tsc mode for trace_clock */
8474 trace_seq_printf(s, "oldest event ts: %llu\n",
8475 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8476
8477 trace_seq_printf(s, "now ts: %llu\n",
8478 ring_buffer_time_stamp(trace_buf->buffer));
8479 }
8480
8481 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8482 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8483
8484 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8485 trace_seq_printf(s, "read events: %ld\n", cnt);
8486
8487 count = simple_read_from_buffer(ubuf, count, ppos,
8488 s->buffer, trace_seq_used(s));
8489
8490 kfree(s);
8491
8492 return count;
8493 }
8494
8495 static const struct file_operations tracing_stats_fops = {
8496 .open = tracing_open_generic_tr,
8497 .read = tracing_stats_read,
8498 .llseek = generic_file_llseek,
8499 .release = tracing_release_generic_tr,
8500 };
8501
8502 #ifdef CONFIG_DYNAMIC_FTRACE
8503
8504 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8505 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8506 size_t cnt, loff_t *ppos)
8507 {
8508 ssize_t ret;
8509 char *buf;
8510 int r;
8511
8512 /* 256 should be plenty to hold the amount needed */
8513 buf = kmalloc(256, GFP_KERNEL);
8514 if (!buf)
8515 return -ENOMEM;
8516
8517 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8518 ftrace_update_tot_cnt,
8519 ftrace_number_of_pages,
8520 ftrace_number_of_groups);
8521
8522 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8523 kfree(buf);
8524 return ret;
8525 }
8526
8527 static const struct file_operations tracing_dyn_info_fops = {
8528 .open = tracing_open_generic,
8529 .read = tracing_read_dyn_info,
8530 .llseek = generic_file_llseek,
8531 };
8532 #endif /* CONFIG_DYNAMIC_FTRACE */
8533
8534 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8535 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8536 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8537 struct trace_array *tr, struct ftrace_probe_ops *ops,
8538 void *data)
8539 {
8540 tracing_snapshot_instance(tr);
8541 }
8542
8543 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8544 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8545 struct trace_array *tr, struct ftrace_probe_ops *ops,
8546 void *data)
8547 {
8548 struct ftrace_func_mapper *mapper = data;
8549 long *count = NULL;
8550
8551 if (mapper)
8552 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8553
8554 if (count) {
8555
8556 if (*count <= 0)
8557 return;
8558
8559 (*count)--;
8560 }
8561
8562 tracing_snapshot_instance(tr);
8563 }
8564
8565 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8566 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8567 struct ftrace_probe_ops *ops, void *data)
8568 {
8569 struct ftrace_func_mapper *mapper = data;
8570 long *count = NULL;
8571
8572 seq_printf(m, "%ps:", (void *)ip);
8573
8574 seq_puts(m, "snapshot");
8575
8576 if (mapper)
8577 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8578
8579 if (count)
8580 seq_printf(m, ":count=%ld\n", *count);
8581 else
8582 seq_puts(m, ":unlimited\n");
8583
8584 return 0;
8585 }
8586
8587 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8588 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8589 unsigned long ip, void *init_data, void **data)
8590 {
8591 struct ftrace_func_mapper *mapper = *data;
8592
8593 if (!mapper) {
8594 mapper = allocate_ftrace_func_mapper();
8595 if (!mapper)
8596 return -ENOMEM;
8597 *data = mapper;
8598 }
8599
8600 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8601 }
8602
8603 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8604 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8605 unsigned long ip, void *data)
8606 {
8607 struct ftrace_func_mapper *mapper = data;
8608
8609 if (!ip) {
8610 if (!mapper)
8611 return;
8612 free_ftrace_func_mapper(mapper, NULL);
8613 return;
8614 }
8615
8616 ftrace_func_mapper_remove_ip(mapper, ip);
8617 }
8618
8619 static struct ftrace_probe_ops snapshot_probe_ops = {
8620 .func = ftrace_snapshot,
8621 .print = ftrace_snapshot_print,
8622 };
8623
8624 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8625 .func = ftrace_count_snapshot,
8626 .print = ftrace_snapshot_print,
8627 .init = ftrace_snapshot_init,
8628 .free = ftrace_snapshot_free,
8629 };
8630
8631 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8632 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8633 char *glob, char *cmd, char *param, int enable)
8634 {
8635 struct ftrace_probe_ops *ops;
8636 void *count = (void *)-1;
8637 char *number;
8638 int ret;
8639
8640 if (!tr)
8641 return -ENODEV;
8642
8643 /* hash funcs only work with set_ftrace_filter */
8644 if (!enable)
8645 return -EINVAL;
8646
8647 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8648
8649 if (glob[0] == '!')
8650 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8651
8652 if (!param)
8653 goto out_reg;
8654
8655 number = strsep(¶m, ":");
8656
8657 if (!strlen(number))
8658 goto out_reg;
8659
8660 /*
8661 * We use the callback data field (which is a pointer)
8662 * as our counter.
8663 */
8664 ret = kstrtoul(number, 0, (unsigned long *)&count);
8665 if (ret)
8666 return ret;
8667
8668 out_reg:
8669 ret = tracing_alloc_snapshot_instance(tr);
8670 if (ret < 0)
8671 goto out;
8672
8673 ret = register_ftrace_function_probe(glob, tr, ops, count);
8674
8675 out:
8676 return ret < 0 ? ret : 0;
8677 }
8678
8679 static struct ftrace_func_command ftrace_snapshot_cmd = {
8680 .name = "snapshot",
8681 .func = ftrace_trace_snapshot_callback,
8682 };
8683
register_snapshot_cmd(void)8684 static __init int register_snapshot_cmd(void)
8685 {
8686 return register_ftrace_command(&ftrace_snapshot_cmd);
8687 }
8688 #else
register_snapshot_cmd(void)8689 static inline __init int register_snapshot_cmd(void) { return 0; }
8690 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8691
tracing_get_dentry(struct trace_array * tr)8692 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8693 {
8694 if (WARN_ON(!tr->dir))
8695 return ERR_PTR(-ENODEV);
8696
8697 /* Top directory uses NULL as the parent */
8698 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8699 return NULL;
8700
8701 /* All sub buffers have a descriptor */
8702 return tr->dir;
8703 }
8704
tracing_dentry_percpu(struct trace_array * tr,int cpu)8705 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8706 {
8707 struct dentry *d_tracer;
8708
8709 if (tr->percpu_dir)
8710 return tr->percpu_dir;
8711
8712 d_tracer = tracing_get_dentry(tr);
8713 if (IS_ERR(d_tracer))
8714 return NULL;
8715
8716 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8717
8718 MEM_FAIL(!tr->percpu_dir,
8719 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8720
8721 return tr->percpu_dir;
8722 }
8723
8724 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8725 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8726 void *data, long cpu, const struct file_operations *fops)
8727 {
8728 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8729
8730 if (ret) /* See tracing_get_cpu() */
8731 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8732 return ret;
8733 }
8734
8735 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8736 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8737 {
8738 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8739 struct dentry *d_cpu;
8740 char cpu_dir[30]; /* 30 characters should be more than enough */
8741
8742 if (!d_percpu)
8743 return;
8744
8745 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8746 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8747 if (!d_cpu) {
8748 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8749 return;
8750 }
8751
8752 /* per cpu trace_pipe */
8753 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8754 tr, cpu, &tracing_pipe_fops);
8755
8756 /* per cpu trace */
8757 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8758 tr, cpu, &tracing_fops);
8759
8760 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8761 tr, cpu, &tracing_buffers_fops);
8762
8763 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8764 tr, cpu, &tracing_stats_fops);
8765
8766 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8767 tr, cpu, &tracing_entries_fops);
8768
8769 #ifdef CONFIG_TRACER_SNAPSHOT
8770 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8771 tr, cpu, &snapshot_fops);
8772
8773 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8774 tr, cpu, &snapshot_raw_fops);
8775 #endif
8776 }
8777
8778 #ifdef CONFIG_FTRACE_SELFTEST
8779 /* Let selftest have access to static functions in this file */
8780 #include "trace_selftest.c"
8781 #endif
8782
8783 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8784 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8785 loff_t *ppos)
8786 {
8787 struct trace_option_dentry *topt = filp->private_data;
8788 char *buf;
8789
8790 if (topt->flags->val & topt->opt->bit)
8791 buf = "1\n";
8792 else
8793 buf = "0\n";
8794
8795 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8796 }
8797
8798 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8799 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8800 loff_t *ppos)
8801 {
8802 struct trace_option_dentry *topt = filp->private_data;
8803 unsigned long val;
8804 int ret;
8805
8806 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8807 if (ret)
8808 return ret;
8809
8810 if (val != 0 && val != 1)
8811 return -EINVAL;
8812
8813 if (!!(topt->flags->val & topt->opt->bit) != val) {
8814 mutex_lock(&trace_types_lock);
8815 ret = __set_tracer_option(topt->tr, topt->flags,
8816 topt->opt, !val);
8817 mutex_unlock(&trace_types_lock);
8818 if (ret)
8819 return ret;
8820 }
8821
8822 *ppos += cnt;
8823
8824 return cnt;
8825 }
8826
tracing_open_options(struct inode * inode,struct file * filp)8827 static int tracing_open_options(struct inode *inode, struct file *filp)
8828 {
8829 struct trace_option_dentry *topt = inode->i_private;
8830 int ret;
8831
8832 ret = tracing_check_open_get_tr(topt->tr);
8833 if (ret)
8834 return ret;
8835
8836 filp->private_data = inode->i_private;
8837 return 0;
8838 }
8839
tracing_release_options(struct inode * inode,struct file * file)8840 static int tracing_release_options(struct inode *inode, struct file *file)
8841 {
8842 struct trace_option_dentry *topt = file->private_data;
8843
8844 trace_array_put(topt->tr);
8845 return 0;
8846 }
8847
8848 static const struct file_operations trace_options_fops = {
8849 .open = tracing_open_options,
8850 .read = trace_options_read,
8851 .write = trace_options_write,
8852 .llseek = generic_file_llseek,
8853 .release = tracing_release_options,
8854 };
8855
8856 /*
8857 * In order to pass in both the trace_array descriptor as well as the index
8858 * to the flag that the trace option file represents, the trace_array
8859 * has a character array of trace_flags_index[], which holds the index
8860 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8861 * The address of this character array is passed to the flag option file
8862 * read/write callbacks.
8863 *
8864 * In order to extract both the index and the trace_array descriptor,
8865 * get_tr_index() uses the following algorithm.
8866 *
8867 * idx = *ptr;
8868 *
8869 * As the pointer itself contains the address of the index (remember
8870 * index[1] == 1).
8871 *
8872 * Then to get the trace_array descriptor, by subtracting that index
8873 * from the ptr, we get to the start of the index itself.
8874 *
8875 * ptr - idx == &index[0]
8876 *
8877 * Then a simple container_of() from that pointer gets us to the
8878 * trace_array descriptor.
8879 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8880 static void get_tr_index(void *data, struct trace_array **ptr,
8881 unsigned int *pindex)
8882 {
8883 *pindex = *(unsigned char *)data;
8884
8885 *ptr = container_of(data - *pindex, struct trace_array,
8886 trace_flags_index);
8887 }
8888
8889 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8890 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8891 loff_t *ppos)
8892 {
8893 void *tr_index = filp->private_data;
8894 struct trace_array *tr;
8895 unsigned int index;
8896 char *buf;
8897
8898 get_tr_index(tr_index, &tr, &index);
8899
8900 if (tr->trace_flags & (1 << index))
8901 buf = "1\n";
8902 else
8903 buf = "0\n";
8904
8905 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8906 }
8907
8908 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8909 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8910 loff_t *ppos)
8911 {
8912 void *tr_index = filp->private_data;
8913 struct trace_array *tr;
8914 unsigned int index;
8915 unsigned long val;
8916 int ret;
8917
8918 get_tr_index(tr_index, &tr, &index);
8919
8920 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8921 if (ret)
8922 return ret;
8923
8924 if (val != 0 && val != 1)
8925 return -EINVAL;
8926
8927 mutex_lock(&event_mutex);
8928 mutex_lock(&trace_types_lock);
8929 ret = set_tracer_flag(tr, 1 << index, val);
8930 mutex_unlock(&trace_types_lock);
8931 mutex_unlock(&event_mutex);
8932
8933 if (ret < 0)
8934 return ret;
8935
8936 *ppos += cnt;
8937
8938 return cnt;
8939 }
8940
8941 static const struct file_operations trace_options_core_fops = {
8942 .open = tracing_open_generic,
8943 .read = trace_options_core_read,
8944 .write = trace_options_core_write,
8945 .llseek = generic_file_llseek,
8946 };
8947
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8948 struct dentry *trace_create_file(const char *name,
8949 umode_t mode,
8950 struct dentry *parent,
8951 void *data,
8952 const struct file_operations *fops)
8953 {
8954 struct dentry *ret;
8955
8956 ret = tracefs_create_file(name, mode, parent, data, fops);
8957 if (!ret)
8958 pr_warn("Could not create tracefs '%s' entry\n", name);
8959
8960 return ret;
8961 }
8962
8963
trace_options_init_dentry(struct trace_array * tr)8964 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8965 {
8966 struct dentry *d_tracer;
8967
8968 if (tr->options)
8969 return tr->options;
8970
8971 d_tracer = tracing_get_dentry(tr);
8972 if (IS_ERR(d_tracer))
8973 return NULL;
8974
8975 tr->options = tracefs_create_dir("options", d_tracer);
8976 if (!tr->options) {
8977 pr_warn("Could not create tracefs directory 'options'\n");
8978 return NULL;
8979 }
8980
8981 return tr->options;
8982 }
8983
8984 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8985 create_trace_option_file(struct trace_array *tr,
8986 struct trace_option_dentry *topt,
8987 struct tracer_flags *flags,
8988 struct tracer_opt *opt)
8989 {
8990 struct dentry *t_options;
8991
8992 t_options = trace_options_init_dentry(tr);
8993 if (!t_options)
8994 return;
8995
8996 topt->flags = flags;
8997 topt->opt = opt;
8998 topt->tr = tr;
8999
9000 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9001 t_options, topt, &trace_options_fops);
9002
9003 }
9004
9005 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9006 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9007 {
9008 struct trace_option_dentry *topts;
9009 struct trace_options *tr_topts;
9010 struct tracer_flags *flags;
9011 struct tracer_opt *opts;
9012 int cnt;
9013 int i;
9014
9015 if (!tracer)
9016 return;
9017
9018 flags = tracer->flags;
9019
9020 if (!flags || !flags->opts)
9021 return;
9022
9023 /*
9024 * If this is an instance, only create flags for tracers
9025 * the instance may have.
9026 */
9027 if (!trace_ok_for_array(tracer, tr))
9028 return;
9029
9030 for (i = 0; i < tr->nr_topts; i++) {
9031 /* Make sure there's no duplicate flags. */
9032 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9033 return;
9034 }
9035
9036 opts = flags->opts;
9037
9038 for (cnt = 0; opts[cnt].name; cnt++)
9039 ;
9040
9041 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9042 if (!topts)
9043 return;
9044
9045 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9046 GFP_KERNEL);
9047 if (!tr_topts) {
9048 kfree(topts);
9049 return;
9050 }
9051
9052 tr->topts = tr_topts;
9053 tr->topts[tr->nr_topts].tracer = tracer;
9054 tr->topts[tr->nr_topts].topts = topts;
9055 tr->nr_topts++;
9056
9057 for (cnt = 0; opts[cnt].name; cnt++) {
9058 create_trace_option_file(tr, &topts[cnt], flags,
9059 &opts[cnt]);
9060 MEM_FAIL(topts[cnt].entry == NULL,
9061 "Failed to create trace option: %s",
9062 opts[cnt].name);
9063 }
9064 }
9065
9066 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9067 create_trace_option_core_file(struct trace_array *tr,
9068 const char *option, long index)
9069 {
9070 struct dentry *t_options;
9071
9072 t_options = trace_options_init_dentry(tr);
9073 if (!t_options)
9074 return NULL;
9075
9076 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9077 (void *)&tr->trace_flags_index[index],
9078 &trace_options_core_fops);
9079 }
9080
create_trace_options_dir(struct trace_array * tr)9081 static void create_trace_options_dir(struct trace_array *tr)
9082 {
9083 struct dentry *t_options;
9084 bool top_level = tr == &global_trace;
9085 int i;
9086
9087 t_options = trace_options_init_dentry(tr);
9088 if (!t_options)
9089 return;
9090
9091 for (i = 0; trace_options[i]; i++) {
9092 if (top_level ||
9093 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9094 create_trace_option_core_file(tr, trace_options[i], i);
9095 }
9096 }
9097
9098 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9099 rb_simple_read(struct file *filp, char __user *ubuf,
9100 size_t cnt, loff_t *ppos)
9101 {
9102 struct trace_array *tr = filp->private_data;
9103 char buf[64];
9104 int r;
9105
9106 r = tracer_tracing_is_on(tr);
9107 r = sprintf(buf, "%d\n", r);
9108
9109 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9110 }
9111
9112 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9113 rb_simple_write(struct file *filp, const char __user *ubuf,
9114 size_t cnt, loff_t *ppos)
9115 {
9116 struct trace_array *tr = filp->private_data;
9117 struct trace_buffer *buffer = tr->array_buffer.buffer;
9118 unsigned long val;
9119 int ret;
9120
9121 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9122 if (ret)
9123 return ret;
9124
9125 if (buffer) {
9126 mutex_lock(&trace_types_lock);
9127 if (!!val == tracer_tracing_is_on(tr)) {
9128 val = 0; /* do nothing */
9129 } else if (val) {
9130 tracer_tracing_on(tr);
9131 if (tr->current_trace->start)
9132 tr->current_trace->start(tr);
9133 } else {
9134 tracer_tracing_off(tr);
9135 if (tr->current_trace->stop)
9136 tr->current_trace->stop(tr);
9137 /* Wake up any waiters */
9138 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9139 }
9140 mutex_unlock(&trace_types_lock);
9141 }
9142
9143 (*ppos)++;
9144
9145 return cnt;
9146 }
9147
9148 static const struct file_operations rb_simple_fops = {
9149 .open = tracing_open_generic_tr,
9150 .read = rb_simple_read,
9151 .write = rb_simple_write,
9152 .release = tracing_release_generic_tr,
9153 .llseek = default_llseek,
9154 };
9155
9156 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9157 buffer_percent_read(struct file *filp, char __user *ubuf,
9158 size_t cnt, loff_t *ppos)
9159 {
9160 struct trace_array *tr = filp->private_data;
9161 char buf[64];
9162 int r;
9163
9164 r = tr->buffer_percent;
9165 r = sprintf(buf, "%d\n", r);
9166
9167 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9168 }
9169
9170 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9171 buffer_percent_write(struct file *filp, const char __user *ubuf,
9172 size_t cnt, loff_t *ppos)
9173 {
9174 struct trace_array *tr = filp->private_data;
9175 unsigned long val;
9176 int ret;
9177
9178 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9179 if (ret)
9180 return ret;
9181
9182 if (val > 100)
9183 return -EINVAL;
9184
9185 tr->buffer_percent = val;
9186
9187 (*ppos)++;
9188
9189 return cnt;
9190 }
9191
9192 static const struct file_operations buffer_percent_fops = {
9193 .open = tracing_open_generic_tr,
9194 .read = buffer_percent_read,
9195 .write = buffer_percent_write,
9196 .release = tracing_release_generic_tr,
9197 .llseek = default_llseek,
9198 };
9199
9200 static struct dentry *trace_instance_dir;
9201
9202 static void
9203 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9204
9205 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9206 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9207 {
9208 enum ring_buffer_flags rb_flags;
9209
9210 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9211
9212 buf->tr = tr;
9213
9214 buf->buffer = ring_buffer_alloc(size, rb_flags);
9215 if (!buf->buffer)
9216 return -ENOMEM;
9217
9218 buf->data = alloc_percpu(struct trace_array_cpu);
9219 if (!buf->data) {
9220 ring_buffer_free(buf->buffer);
9221 buf->buffer = NULL;
9222 return -ENOMEM;
9223 }
9224
9225 /* Allocate the first page for all buffers */
9226 set_buffer_entries(&tr->array_buffer,
9227 ring_buffer_size(tr->array_buffer.buffer, 0));
9228
9229 return 0;
9230 }
9231
allocate_trace_buffers(struct trace_array * tr,int size)9232 static int allocate_trace_buffers(struct trace_array *tr, int size)
9233 {
9234 int ret;
9235
9236 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9237 if (ret)
9238 return ret;
9239
9240 #ifdef CONFIG_TRACER_MAX_TRACE
9241 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9242 allocate_snapshot ? size : 1);
9243 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9244 ring_buffer_free(tr->array_buffer.buffer);
9245 tr->array_buffer.buffer = NULL;
9246 free_percpu(tr->array_buffer.data);
9247 tr->array_buffer.data = NULL;
9248 return -ENOMEM;
9249 }
9250 tr->allocated_snapshot = allocate_snapshot;
9251
9252 /*
9253 * Only the top level trace array gets its snapshot allocated
9254 * from the kernel command line.
9255 */
9256 allocate_snapshot = false;
9257 #endif
9258
9259 return 0;
9260 }
9261
free_trace_buffer(struct array_buffer * buf)9262 static void free_trace_buffer(struct array_buffer *buf)
9263 {
9264 if (buf->buffer) {
9265 ring_buffer_free(buf->buffer);
9266 buf->buffer = NULL;
9267 free_percpu(buf->data);
9268 buf->data = NULL;
9269 }
9270 }
9271
free_trace_buffers(struct trace_array * tr)9272 static void free_trace_buffers(struct trace_array *tr)
9273 {
9274 if (!tr)
9275 return;
9276
9277 free_trace_buffer(&tr->array_buffer);
9278
9279 #ifdef CONFIG_TRACER_MAX_TRACE
9280 free_trace_buffer(&tr->max_buffer);
9281 #endif
9282 }
9283
init_trace_flags_index(struct trace_array * tr)9284 static void init_trace_flags_index(struct trace_array *tr)
9285 {
9286 int i;
9287
9288 /* Used by the trace options files */
9289 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9290 tr->trace_flags_index[i] = i;
9291 }
9292
__update_tracer_options(struct trace_array * tr)9293 static void __update_tracer_options(struct trace_array *tr)
9294 {
9295 struct tracer *t;
9296
9297 for (t = trace_types; t; t = t->next)
9298 add_tracer_options(tr, t);
9299 }
9300
update_tracer_options(struct trace_array * tr)9301 static void update_tracer_options(struct trace_array *tr)
9302 {
9303 mutex_lock(&trace_types_lock);
9304 tracer_options_updated = true;
9305 __update_tracer_options(tr);
9306 mutex_unlock(&trace_types_lock);
9307 }
9308
9309 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9310 struct trace_array *trace_array_find(const char *instance)
9311 {
9312 struct trace_array *tr, *found = NULL;
9313
9314 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9315 if (tr->name && strcmp(tr->name, instance) == 0) {
9316 found = tr;
9317 break;
9318 }
9319 }
9320
9321 return found;
9322 }
9323
trace_array_find_get(const char * instance)9324 struct trace_array *trace_array_find_get(const char *instance)
9325 {
9326 struct trace_array *tr;
9327
9328 mutex_lock(&trace_types_lock);
9329 tr = trace_array_find(instance);
9330 if (tr)
9331 tr->ref++;
9332 mutex_unlock(&trace_types_lock);
9333
9334 return tr;
9335 }
9336
trace_array_create_dir(struct trace_array * tr)9337 static int trace_array_create_dir(struct trace_array *tr)
9338 {
9339 int ret;
9340
9341 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9342 if (!tr->dir)
9343 return -EINVAL;
9344
9345 ret = event_trace_add_tracer(tr->dir, tr);
9346 if (ret) {
9347 tracefs_remove(tr->dir);
9348 return ret;
9349 }
9350
9351 init_tracer_tracefs(tr, tr->dir);
9352 __update_tracer_options(tr);
9353
9354 return ret;
9355 }
9356
trace_array_create(const char * name)9357 static struct trace_array *trace_array_create(const char *name)
9358 {
9359 struct trace_array *tr;
9360 int ret;
9361
9362 ret = -ENOMEM;
9363 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9364 if (!tr)
9365 return ERR_PTR(ret);
9366
9367 tr->name = kstrdup(name, GFP_KERNEL);
9368 if (!tr->name)
9369 goto out_free_tr;
9370
9371 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9372 goto out_free_tr;
9373
9374 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9375
9376 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9377
9378 raw_spin_lock_init(&tr->start_lock);
9379
9380 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9381
9382 tr->current_trace = &nop_trace;
9383
9384 INIT_LIST_HEAD(&tr->systems);
9385 INIT_LIST_HEAD(&tr->events);
9386 INIT_LIST_HEAD(&tr->hist_vars);
9387 INIT_LIST_HEAD(&tr->err_log);
9388
9389 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9390 goto out_free_tr;
9391
9392 if (ftrace_allocate_ftrace_ops(tr) < 0)
9393 goto out_free_tr;
9394
9395 ftrace_init_trace_array(tr);
9396
9397 init_trace_flags_index(tr);
9398
9399 if (trace_instance_dir) {
9400 ret = trace_array_create_dir(tr);
9401 if (ret)
9402 goto out_free_tr;
9403 } else
9404 __trace_early_add_events(tr);
9405
9406 list_add(&tr->list, &ftrace_trace_arrays);
9407
9408 tr->ref++;
9409
9410 return tr;
9411
9412 out_free_tr:
9413 ftrace_free_ftrace_ops(tr);
9414 free_trace_buffers(tr);
9415 free_cpumask_var(tr->tracing_cpumask);
9416 kfree(tr->name);
9417 kfree(tr);
9418
9419 return ERR_PTR(ret);
9420 }
9421
instance_mkdir(const char * name)9422 static int instance_mkdir(const char *name)
9423 {
9424 struct trace_array *tr;
9425 int ret;
9426
9427 mutex_lock(&event_mutex);
9428 mutex_lock(&trace_types_lock);
9429
9430 ret = -EEXIST;
9431 if (trace_array_find(name))
9432 goto out_unlock;
9433
9434 tr = trace_array_create(name);
9435
9436 ret = PTR_ERR_OR_ZERO(tr);
9437
9438 out_unlock:
9439 mutex_unlock(&trace_types_lock);
9440 mutex_unlock(&event_mutex);
9441 return ret;
9442 }
9443
9444 /**
9445 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9446 * @name: The name of the trace array to be looked up/created.
9447 *
9448 * Returns pointer to trace array with given name.
9449 * NULL, if it cannot be created.
9450 *
9451 * NOTE: This function increments the reference counter associated with the
9452 * trace array returned. This makes sure it cannot be freed while in use.
9453 * Use trace_array_put() once the trace array is no longer needed.
9454 * If the trace_array is to be freed, trace_array_destroy() needs to
9455 * be called after the trace_array_put(), or simply let user space delete
9456 * it from the tracefs instances directory. But until the
9457 * trace_array_put() is called, user space can not delete it.
9458 *
9459 */
trace_array_get_by_name(const char * name)9460 struct trace_array *trace_array_get_by_name(const char *name)
9461 {
9462 struct trace_array *tr;
9463
9464 mutex_lock(&event_mutex);
9465 mutex_lock(&trace_types_lock);
9466
9467 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9468 if (tr->name && strcmp(tr->name, name) == 0)
9469 goto out_unlock;
9470 }
9471
9472 tr = trace_array_create(name);
9473
9474 if (IS_ERR(tr))
9475 tr = NULL;
9476 out_unlock:
9477 if (tr)
9478 tr->ref++;
9479
9480 mutex_unlock(&trace_types_lock);
9481 mutex_unlock(&event_mutex);
9482 return tr;
9483 }
9484 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9485
__remove_instance(struct trace_array * tr)9486 static int __remove_instance(struct trace_array *tr)
9487 {
9488 int i;
9489
9490 /* Reference counter for a newly created trace array = 1. */
9491 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9492 return -EBUSY;
9493
9494 list_del(&tr->list);
9495
9496 /* Disable all the flags that were enabled coming in */
9497 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9498 if ((1 << i) & ZEROED_TRACE_FLAGS)
9499 set_tracer_flag(tr, 1 << i, 0);
9500 }
9501
9502 tracing_set_nop(tr);
9503 clear_ftrace_function_probes(tr);
9504 event_trace_del_tracer(tr);
9505 ftrace_clear_pids(tr);
9506 ftrace_destroy_function_files(tr);
9507 tracefs_remove(tr->dir);
9508 free_percpu(tr->last_func_repeats);
9509 free_trace_buffers(tr);
9510 clear_tracing_err_log(tr);
9511
9512 for (i = 0; i < tr->nr_topts; i++) {
9513 kfree(tr->topts[i].topts);
9514 }
9515 kfree(tr->topts);
9516
9517 free_cpumask_var(tr->tracing_cpumask);
9518 kfree(tr->name);
9519 kfree(tr);
9520
9521 return 0;
9522 }
9523
trace_array_destroy(struct trace_array * this_tr)9524 int trace_array_destroy(struct trace_array *this_tr)
9525 {
9526 struct trace_array *tr;
9527 int ret;
9528
9529 if (!this_tr)
9530 return -EINVAL;
9531
9532 mutex_lock(&event_mutex);
9533 mutex_lock(&trace_types_lock);
9534
9535 ret = -ENODEV;
9536
9537 /* Making sure trace array exists before destroying it. */
9538 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9539 if (tr == this_tr) {
9540 ret = __remove_instance(tr);
9541 break;
9542 }
9543 }
9544
9545 mutex_unlock(&trace_types_lock);
9546 mutex_unlock(&event_mutex);
9547
9548 return ret;
9549 }
9550 EXPORT_SYMBOL_GPL(trace_array_destroy);
9551
instance_rmdir(const char * name)9552 static int instance_rmdir(const char *name)
9553 {
9554 struct trace_array *tr;
9555 int ret;
9556
9557 mutex_lock(&event_mutex);
9558 mutex_lock(&trace_types_lock);
9559
9560 ret = -ENODEV;
9561 tr = trace_array_find(name);
9562 if (tr)
9563 ret = __remove_instance(tr);
9564
9565 mutex_unlock(&trace_types_lock);
9566 mutex_unlock(&event_mutex);
9567
9568 return ret;
9569 }
9570
create_trace_instances(struct dentry * d_tracer)9571 static __init void create_trace_instances(struct dentry *d_tracer)
9572 {
9573 struct trace_array *tr;
9574
9575 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9576 instance_mkdir,
9577 instance_rmdir);
9578 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9579 return;
9580
9581 mutex_lock(&event_mutex);
9582 mutex_lock(&trace_types_lock);
9583
9584 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9585 if (!tr->name)
9586 continue;
9587 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9588 "Failed to create instance directory\n"))
9589 break;
9590 }
9591
9592 mutex_unlock(&trace_types_lock);
9593 mutex_unlock(&event_mutex);
9594 }
9595
9596 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9597 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9598 {
9599 struct trace_event_file *file;
9600 int cpu;
9601
9602 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9603 tr, &show_traces_fops);
9604
9605 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9606 tr, &set_tracer_fops);
9607
9608 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9609 tr, &tracing_cpumask_fops);
9610
9611 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9612 tr, &tracing_iter_fops);
9613
9614 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9615 tr, &tracing_fops);
9616
9617 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9618 tr, &tracing_pipe_fops);
9619
9620 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9621 tr, &tracing_entries_fops);
9622
9623 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9624 tr, &tracing_total_entries_fops);
9625
9626 trace_create_file("free_buffer", 0200, d_tracer,
9627 tr, &tracing_free_buffer_fops);
9628
9629 trace_create_file("trace_marker", 0220, d_tracer,
9630 tr, &tracing_mark_fops);
9631
9632 file = __find_event_file(tr, "ftrace", "print");
9633 if (file && file->dir)
9634 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9635 file, &event_trigger_fops);
9636 tr->trace_marker_file = file;
9637
9638 trace_create_file("trace_marker_raw", 0220, d_tracer,
9639 tr, &tracing_mark_raw_fops);
9640
9641 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9642 &trace_clock_fops);
9643
9644 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9645 tr, &rb_simple_fops);
9646
9647 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9648 &trace_time_stamp_mode_fops);
9649
9650 tr->buffer_percent = 50;
9651
9652 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9653 tr, &buffer_percent_fops);
9654
9655 create_trace_options_dir(tr);
9656
9657 #ifdef CONFIG_TRACER_MAX_TRACE
9658 trace_create_maxlat_file(tr, d_tracer);
9659 #endif
9660
9661 if (ftrace_create_function_files(tr, d_tracer))
9662 MEM_FAIL(1, "Could not allocate function filter files");
9663
9664 #ifdef CONFIG_TRACER_SNAPSHOT
9665 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9666 tr, &snapshot_fops);
9667 #endif
9668
9669 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9670 tr, &tracing_err_log_fops);
9671
9672 for_each_tracing_cpu(cpu)
9673 tracing_init_tracefs_percpu(tr, cpu);
9674
9675 ftrace_init_tracefs(tr, d_tracer);
9676 }
9677
trace_automount(struct dentry * mntpt,void * ingore)9678 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9679 {
9680 struct vfsmount *mnt;
9681 struct file_system_type *type;
9682
9683 /*
9684 * To maintain backward compatibility for tools that mount
9685 * debugfs to get to the tracing facility, tracefs is automatically
9686 * mounted to the debugfs/tracing directory.
9687 */
9688 type = get_fs_type("tracefs");
9689 if (!type)
9690 return NULL;
9691 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9692 put_filesystem(type);
9693 if (IS_ERR(mnt))
9694 return NULL;
9695 mntget(mnt);
9696
9697 return mnt;
9698 }
9699
9700 /**
9701 * tracing_init_dentry - initialize top level trace array
9702 *
9703 * This is called when creating files or directories in the tracing
9704 * directory. It is called via fs_initcall() by any of the boot up code
9705 * and expects to return the dentry of the top level tracing directory.
9706 */
tracing_init_dentry(void)9707 int tracing_init_dentry(void)
9708 {
9709 struct trace_array *tr = &global_trace;
9710
9711 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9712 pr_warn("Tracing disabled due to lockdown\n");
9713 return -EPERM;
9714 }
9715
9716 /* The top level trace array uses NULL as parent */
9717 if (tr->dir)
9718 return 0;
9719
9720 if (WARN_ON(!tracefs_initialized()))
9721 return -ENODEV;
9722
9723 /*
9724 * As there may still be users that expect the tracing
9725 * files to exist in debugfs/tracing, we must automount
9726 * the tracefs file system there, so older tools still
9727 * work with the newer kernel.
9728 */
9729 tr->dir = debugfs_create_automount("tracing", NULL,
9730 trace_automount, NULL);
9731
9732 return 0;
9733 }
9734
9735 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9736 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9737
9738 static struct workqueue_struct *eval_map_wq __initdata;
9739 static struct work_struct eval_map_work __initdata;
9740
eval_map_work_func(struct work_struct * work)9741 static void __init eval_map_work_func(struct work_struct *work)
9742 {
9743 int len;
9744
9745 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9746 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9747 }
9748
trace_eval_init(void)9749 static int __init trace_eval_init(void)
9750 {
9751 INIT_WORK(&eval_map_work, eval_map_work_func);
9752
9753 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9754 if (!eval_map_wq) {
9755 pr_err("Unable to allocate eval_map_wq\n");
9756 /* Do work here */
9757 eval_map_work_func(&eval_map_work);
9758 return -ENOMEM;
9759 }
9760
9761 queue_work(eval_map_wq, &eval_map_work);
9762 return 0;
9763 }
9764
trace_eval_sync(void)9765 static int __init trace_eval_sync(void)
9766 {
9767 /* Make sure the eval map updates are finished */
9768 if (eval_map_wq)
9769 destroy_workqueue(eval_map_wq);
9770 return 0;
9771 }
9772
9773 late_initcall_sync(trace_eval_sync);
9774
9775
9776 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9777 static void trace_module_add_evals(struct module *mod)
9778 {
9779 if (!mod->num_trace_evals)
9780 return;
9781
9782 /*
9783 * Modules with bad taint do not have events created, do
9784 * not bother with enums either.
9785 */
9786 if (trace_module_has_bad_taint(mod))
9787 return;
9788
9789 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9790 }
9791
9792 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9793 static void trace_module_remove_evals(struct module *mod)
9794 {
9795 union trace_eval_map_item *map;
9796 union trace_eval_map_item **last = &trace_eval_maps;
9797
9798 if (!mod->num_trace_evals)
9799 return;
9800
9801 mutex_lock(&trace_eval_mutex);
9802
9803 map = trace_eval_maps;
9804
9805 while (map) {
9806 if (map->head.mod == mod)
9807 break;
9808 map = trace_eval_jmp_to_tail(map);
9809 last = &map->tail.next;
9810 map = map->tail.next;
9811 }
9812 if (!map)
9813 goto out;
9814
9815 *last = trace_eval_jmp_to_tail(map)->tail.next;
9816 kfree(map);
9817 out:
9818 mutex_unlock(&trace_eval_mutex);
9819 }
9820 #else
trace_module_remove_evals(struct module * mod)9821 static inline void trace_module_remove_evals(struct module *mod) { }
9822 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9823
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9824 static int trace_module_notify(struct notifier_block *self,
9825 unsigned long val, void *data)
9826 {
9827 struct module *mod = data;
9828
9829 switch (val) {
9830 case MODULE_STATE_COMING:
9831 trace_module_add_evals(mod);
9832 break;
9833 case MODULE_STATE_GOING:
9834 trace_module_remove_evals(mod);
9835 break;
9836 }
9837
9838 return NOTIFY_OK;
9839 }
9840
9841 static struct notifier_block trace_module_nb = {
9842 .notifier_call = trace_module_notify,
9843 .priority = 0,
9844 };
9845 #endif /* CONFIG_MODULES */
9846
tracer_init_tracefs(void)9847 static __init int tracer_init_tracefs(void)
9848 {
9849 int ret;
9850
9851 trace_access_lock_init();
9852
9853 ret = tracing_init_dentry();
9854 if (ret)
9855 return 0;
9856
9857 event_trace_init();
9858
9859 init_tracer_tracefs(&global_trace, NULL);
9860 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9861
9862 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9863 &global_trace, &tracing_thresh_fops);
9864
9865 trace_create_file("README", TRACE_MODE_READ, NULL,
9866 NULL, &tracing_readme_fops);
9867
9868 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9869 NULL, &tracing_saved_cmdlines_fops);
9870
9871 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9872 NULL, &tracing_saved_cmdlines_size_fops);
9873
9874 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9875 NULL, &tracing_saved_tgids_fops);
9876
9877 trace_eval_init();
9878
9879 trace_create_eval_file(NULL);
9880
9881 #ifdef CONFIG_MODULES
9882 register_module_notifier(&trace_module_nb);
9883 #endif
9884
9885 #ifdef CONFIG_DYNAMIC_FTRACE
9886 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9887 NULL, &tracing_dyn_info_fops);
9888 #endif
9889
9890 create_trace_instances(NULL);
9891
9892 update_tracer_options(&global_trace);
9893
9894 return 0;
9895 }
9896
9897 fs_initcall(tracer_init_tracefs);
9898
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9899 static int trace_panic_handler(struct notifier_block *this,
9900 unsigned long event, void *unused)
9901 {
9902 bool ftrace_check = false;
9903
9904 trace_android_vh_ftrace_oops_enter(&ftrace_check);
9905
9906 if (ftrace_check)
9907 return NOTIFY_OK;
9908
9909 if (ftrace_dump_on_oops)
9910 ftrace_dump(ftrace_dump_on_oops);
9911
9912 trace_android_vh_ftrace_oops_exit(&ftrace_check);
9913 return NOTIFY_OK;
9914 }
9915
9916 static struct notifier_block trace_panic_notifier = {
9917 .notifier_call = trace_panic_handler,
9918 .next = NULL,
9919 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9920 };
9921
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9922 static int trace_die_handler(struct notifier_block *self,
9923 unsigned long val,
9924 void *data)
9925 {
9926 bool ftrace_check = false;
9927
9928 trace_android_vh_ftrace_oops_enter(&ftrace_check);
9929
9930 if (ftrace_check)
9931 return NOTIFY_OK;
9932
9933 switch (val) {
9934 case DIE_OOPS:
9935 if (ftrace_dump_on_oops)
9936 ftrace_dump(ftrace_dump_on_oops);
9937 break;
9938 default:
9939 break;
9940 }
9941
9942 trace_android_vh_ftrace_oops_exit(&ftrace_check);
9943 return NOTIFY_OK;
9944 }
9945
9946 static struct notifier_block trace_die_notifier = {
9947 .notifier_call = trace_die_handler,
9948 .priority = 200
9949 };
9950
9951 /*
9952 * printk is set to max of 1024, we really don't need it that big.
9953 * Nothing should be printing 1000 characters anyway.
9954 */
9955 #define TRACE_MAX_PRINT 1000
9956
9957 /*
9958 * Define here KERN_TRACE so that we have one place to modify
9959 * it if we decide to change what log level the ftrace dump
9960 * should be at.
9961 */
9962 #define KERN_TRACE KERN_EMERG
9963
9964 void
trace_printk_seq(struct trace_seq * s)9965 trace_printk_seq(struct trace_seq *s)
9966 {
9967 bool dump_printk = true;
9968
9969 /* Probably should print a warning here. */
9970 if (s->seq.len >= TRACE_MAX_PRINT)
9971 s->seq.len = TRACE_MAX_PRINT;
9972
9973 /*
9974 * More paranoid code. Although the buffer size is set to
9975 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9976 * an extra layer of protection.
9977 */
9978 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9979 s->seq.len = s->seq.size - 1;
9980
9981 /* should be zero ended, but we are paranoid. */
9982 s->buffer[s->seq.len] = 0;
9983
9984 trace_android_vh_ftrace_dump_buffer(s, &dump_printk);
9985 if (dump_printk)
9986 printk(KERN_TRACE "%s", s->buffer);
9987
9988 trace_seq_init(s);
9989 }
9990
trace_init_global_iter(struct trace_iterator * iter)9991 void trace_init_global_iter(struct trace_iterator *iter)
9992 {
9993 iter->tr = &global_trace;
9994 iter->trace = iter->tr->current_trace;
9995 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9996 iter->array_buffer = &global_trace.array_buffer;
9997
9998 if (iter->trace && iter->trace->open)
9999 iter->trace->open(iter);
10000
10001 /* Annotate start of buffers if we had overruns */
10002 if (ring_buffer_overruns(iter->array_buffer->buffer))
10003 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10004
10005 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10006 if (trace_clocks[iter->tr->clock_id].in_ns)
10007 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10008
10009 /* Can not use kmalloc for iter.temp and iter.fmt */
10010 iter->temp = static_temp_buf;
10011 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10012 iter->fmt = static_fmt_buf;
10013 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10014 }
10015
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10016 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10017 {
10018 /* use static because iter can be a bit big for the stack */
10019 static struct trace_iterator iter;
10020 static atomic_t dump_running;
10021 struct trace_array *tr = &global_trace;
10022 unsigned int old_userobj;
10023 unsigned long flags;
10024 int cnt = 0, cpu;
10025 bool ftrace_check = false;
10026 unsigned long size;
10027
10028 /* Only allow one dump user at a time. */
10029 if (atomic_inc_return(&dump_running) != 1) {
10030 atomic_dec(&dump_running);
10031 return;
10032 }
10033
10034 /*
10035 * Always turn off tracing when we dump.
10036 * We don't need to show trace output of what happens
10037 * between multiple crashes.
10038 *
10039 * If the user does a sysrq-z, then they can re-enable
10040 * tracing with echo 1 > tracing_on.
10041 */
10042 tracing_off();
10043
10044 local_irq_save(flags);
10045
10046 /* Simulate the iterator */
10047 trace_init_global_iter(&iter);
10048
10049 for_each_tracing_cpu(cpu) {
10050 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10051 size = ring_buffer_size(iter.array_buffer->buffer, cpu);
10052 trace_android_vh_ftrace_size_check(size, &ftrace_check);
10053 }
10054
10055 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10056
10057 /* don't look at user memory in panic mode */
10058 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10059
10060 if (ftrace_check)
10061 goto out_enable;
10062
10063 switch (oops_dump_mode) {
10064 case DUMP_ALL:
10065 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10066 break;
10067 case DUMP_ORIG:
10068 iter.cpu_file = raw_smp_processor_id();
10069 break;
10070 case DUMP_NONE:
10071 goto out_enable;
10072 default:
10073 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10074 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10075 }
10076
10077 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10078
10079 /* Did function tracer already get disabled? */
10080 if (ftrace_is_dead()) {
10081 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10082 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10083 }
10084
10085 /*
10086 * We need to stop all tracing on all CPUS to read
10087 * the next buffer. This is a bit expensive, but is
10088 * not done often. We fill all what we can read,
10089 * and then release the locks again.
10090 */
10091
10092 while (!trace_empty(&iter)) {
10093 ftrace_check = true;
10094
10095 if (!cnt)
10096 printk(KERN_TRACE "---------------------------------\n");
10097
10098 cnt++;
10099
10100 trace_iterator_reset(&iter);
10101 trace_android_vh_ftrace_format_check(&ftrace_check);
10102 if (ftrace_check)
10103 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10104
10105 if (trace_find_next_entry_inc(&iter) != NULL) {
10106 int ret;
10107
10108 ret = print_trace_line(&iter);
10109 if (ret != TRACE_TYPE_NO_CONSUME)
10110 trace_consume(&iter);
10111 }
10112 touch_nmi_watchdog();
10113
10114 trace_printk_seq(&iter.seq);
10115 }
10116
10117 if (!cnt)
10118 printk(KERN_TRACE " (ftrace buffer empty)\n");
10119 else
10120 printk(KERN_TRACE "---------------------------------\n");
10121
10122 out_enable:
10123 tr->trace_flags |= old_userobj;
10124
10125 for_each_tracing_cpu(cpu) {
10126 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10127 }
10128 atomic_dec(&dump_running);
10129 local_irq_restore(flags);
10130 }
10131 EXPORT_SYMBOL_GPL(ftrace_dump);
10132
10133 #define WRITE_BUFSIZE 4096
10134
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10135 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10136 size_t count, loff_t *ppos,
10137 int (*createfn)(const char *))
10138 {
10139 char *kbuf, *buf, *tmp;
10140 int ret = 0;
10141 size_t done = 0;
10142 size_t size;
10143
10144 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10145 if (!kbuf)
10146 return -ENOMEM;
10147
10148 while (done < count) {
10149 size = count - done;
10150
10151 if (size >= WRITE_BUFSIZE)
10152 size = WRITE_BUFSIZE - 1;
10153
10154 if (copy_from_user(kbuf, buffer + done, size)) {
10155 ret = -EFAULT;
10156 goto out;
10157 }
10158 kbuf[size] = '\0';
10159 buf = kbuf;
10160 do {
10161 tmp = strchr(buf, '\n');
10162 if (tmp) {
10163 *tmp = '\0';
10164 size = tmp - buf + 1;
10165 } else {
10166 size = strlen(buf);
10167 if (done + size < count) {
10168 if (buf != kbuf)
10169 break;
10170 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10171 pr_warn("Line length is too long: Should be less than %d\n",
10172 WRITE_BUFSIZE - 2);
10173 ret = -EINVAL;
10174 goto out;
10175 }
10176 }
10177 done += size;
10178
10179 /* Remove comments */
10180 tmp = strchr(buf, '#');
10181
10182 if (tmp)
10183 *tmp = '\0';
10184
10185 ret = createfn(buf);
10186 if (ret)
10187 goto out;
10188 buf += size;
10189
10190 } while (done < count);
10191 }
10192 ret = done;
10193
10194 out:
10195 kfree(kbuf);
10196
10197 return ret;
10198 }
10199
tracer_alloc_buffers(void)10200 __init static int tracer_alloc_buffers(void)
10201 {
10202 int ring_buf_size;
10203 int ret = -ENOMEM;
10204
10205
10206 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10207 pr_warn("Tracing disabled due to lockdown\n");
10208 return -EPERM;
10209 }
10210
10211 /*
10212 * Make sure we don't accidentally add more trace options
10213 * than we have bits for.
10214 */
10215 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10216
10217 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10218 goto out;
10219
10220 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10221 goto out_free_buffer_mask;
10222
10223 /* Only allocate trace_printk buffers if a trace_printk exists */
10224 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10225 /* Must be called before global_trace.buffer is allocated */
10226 trace_printk_init_buffers();
10227
10228 /* To save memory, keep the ring buffer size to its minimum */
10229 if (ring_buffer_expanded)
10230 ring_buf_size = trace_buf_size;
10231 else
10232 ring_buf_size = 1;
10233
10234 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10235 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10236
10237 raw_spin_lock_init(&global_trace.start_lock);
10238
10239 /*
10240 * The prepare callbacks allocates some memory for the ring buffer. We
10241 * don't free the buffer if the CPU goes down. If we were to free
10242 * the buffer, then the user would lose any trace that was in the
10243 * buffer. The memory will be removed once the "instance" is removed.
10244 */
10245 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10246 "trace/RB:preapre", trace_rb_cpu_prepare,
10247 NULL);
10248 if (ret < 0)
10249 goto out_free_cpumask;
10250 /* Used for event triggers */
10251 ret = -ENOMEM;
10252 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10253 if (!temp_buffer)
10254 goto out_rm_hp_state;
10255
10256 if (trace_create_savedcmd() < 0)
10257 goto out_free_temp_buffer;
10258
10259 /* TODO: make the number of buffers hot pluggable with CPUS */
10260 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10261 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10262 goto out_free_savedcmd;
10263 }
10264
10265 if (global_trace.buffer_disabled)
10266 tracing_off();
10267
10268 if (trace_boot_clock) {
10269 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10270 if (ret < 0)
10271 pr_warn("Trace clock %s not defined, going back to default\n",
10272 trace_boot_clock);
10273 }
10274
10275 /*
10276 * register_tracer() might reference current_trace, so it
10277 * needs to be set before we register anything. This is
10278 * just a bootstrap of current_trace anyway.
10279 */
10280 global_trace.current_trace = &nop_trace;
10281
10282 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10283
10284 ftrace_init_global_array_ops(&global_trace);
10285
10286 init_trace_flags_index(&global_trace);
10287
10288 register_tracer(&nop_trace);
10289
10290 /* Function tracing may start here (via kernel command line) */
10291 init_function_trace();
10292
10293 /* All seems OK, enable tracing */
10294 tracing_disabled = 0;
10295
10296 atomic_notifier_chain_register(&panic_notifier_list,
10297 &trace_panic_notifier);
10298
10299 register_die_notifier(&trace_die_notifier);
10300
10301 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10302
10303 INIT_LIST_HEAD(&global_trace.systems);
10304 INIT_LIST_HEAD(&global_trace.events);
10305 INIT_LIST_HEAD(&global_trace.hist_vars);
10306 INIT_LIST_HEAD(&global_trace.err_log);
10307 list_add(&global_trace.list, &ftrace_trace_arrays);
10308
10309 apply_trace_boot_options();
10310
10311 register_snapshot_cmd();
10312
10313 test_can_verify();
10314
10315 return 0;
10316
10317 out_free_savedcmd:
10318 free_saved_cmdlines_buffer(savedcmd);
10319 out_free_temp_buffer:
10320 ring_buffer_free(temp_buffer);
10321 out_rm_hp_state:
10322 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10323 out_free_cpumask:
10324 free_cpumask_var(global_trace.tracing_cpumask);
10325 out_free_buffer_mask:
10326 free_cpumask_var(tracing_buffer_mask);
10327 out:
10328 return ret;
10329 }
10330
early_trace_init(void)10331 void __init early_trace_init(void)
10332 {
10333 if (tracepoint_printk) {
10334 tracepoint_print_iter =
10335 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10336 if (MEM_FAIL(!tracepoint_print_iter,
10337 "Failed to allocate trace iterator\n"))
10338 tracepoint_printk = 0;
10339 else
10340 static_key_enable(&tracepoint_printk_key.key);
10341 }
10342 tracer_alloc_buffers();
10343
10344 init_events();
10345 }
10346
trace_init(void)10347 void __init trace_init(void)
10348 {
10349 trace_event_init();
10350 }
10351
clear_boot_tracer(void)10352 __init static void clear_boot_tracer(void)
10353 {
10354 /*
10355 * The default tracer at boot buffer is an init section.
10356 * This function is called in lateinit. If we did not
10357 * find the boot tracer, then clear it out, to prevent
10358 * later registration from accessing the buffer that is
10359 * about to be freed.
10360 */
10361 if (!default_bootup_tracer)
10362 return;
10363
10364 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10365 default_bootup_tracer);
10366 default_bootup_tracer = NULL;
10367 }
10368
10369 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10370 __init static void tracing_set_default_clock(void)
10371 {
10372 /* sched_clock_stable() is determined in late_initcall */
10373 if (!trace_boot_clock && !sched_clock_stable()) {
10374 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10375 pr_warn("Can not set tracing clock due to lockdown\n");
10376 return;
10377 }
10378
10379 printk(KERN_WARNING
10380 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10381 "If you want to keep using the local clock, then add:\n"
10382 " \"trace_clock=local\"\n"
10383 "on the kernel command line\n");
10384 tracing_set_clock(&global_trace, "global");
10385 }
10386 }
10387 #else
tracing_set_default_clock(void)10388 static inline void tracing_set_default_clock(void) { }
10389 #endif
10390
late_trace_init(void)10391 __init static int late_trace_init(void)
10392 {
10393 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10394 static_key_disable(&tracepoint_printk_key.key);
10395 tracepoint_printk = 0;
10396 }
10397
10398 tracing_set_default_clock();
10399 clear_boot_tracer();
10400 return 0;
10401 }
10402
10403 late_initcall_sync(late_trace_init);
10404