1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/kmemleak.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <trace/hooks/ftrace_dump.h>
54
55 #include "trace.h"
56 #include "trace_output.h"
57
58 /*
59 * On boot up, the ring buffer is set to the minimum size, so that
60 * we do not waste memory on systems that are not using tracing.
61 */
62 bool ring_buffer_expanded;
63
64 /*
65 * We need to change this state when a selftest is running.
66 * A selftest will lurk into the ring-buffer to count the
67 * entries inserted during the selftest although some concurrent
68 * insertions into the ring-buffer such as trace_printk could occurred
69 * at the same time, giving false positive or negative results.
70 */
71 static bool __read_mostly tracing_selftest_running;
72
73 /*
74 * If boot-time tracing including tracers/events via kernel cmdline
75 * is running, we do not want to run SELFTEST.
76 */
77 bool __read_mostly tracing_selftest_disabled;
78
79 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)80 void __init disable_tracing_selftest(const char *reason)
81 {
82 if (!tracing_selftest_disabled) {
83 tracing_selftest_disabled = true;
84 pr_info("Ftrace startup test is disabled due to %s\n", reason);
85 }
86 }
87 #endif
88
89 /* Pipe tracepoints to printk */
90 struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94
95 /* For tracers that don't implement custom flags */
96 static struct tracer_opt dummy_tracer_opt[] = {
97 { }
98 };
99
100 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)101 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
102 {
103 return 0;
104 }
105
106 /*
107 * To prevent the comm cache from being overwritten when no
108 * tracing is active, only save the comm when a trace event
109 * occurred.
110 */
111 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
112
113 /*
114 * Kill all tracing for good (never come back).
115 * It is initialized to 1 but will turn to zero if the initialization
116 * of the tracer is successful. But that is the only place that sets
117 * this back to zero.
118 */
119 static int tracing_disabled = 1;
120
121 cpumask_var_t __read_mostly tracing_buffer_mask;
122
123 /*
124 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
125 *
126 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
127 * is set, then ftrace_dump is called. This will output the contents
128 * of the ftrace buffers to the console. This is very useful for
129 * capturing traces that lead to crashes and outputing it to a
130 * serial console.
131 *
132 * It is default off, but you can enable it with either specifying
133 * "ftrace_dump_on_oops" in the kernel command line, or setting
134 * /proc/sys/kernel/ftrace_dump_on_oops
135 * Set 1 if you want to dump buffers of all CPUs
136 * Set 2 if you want to dump the buffer of the CPU that triggered oops
137 */
138
139 enum ftrace_dump_mode ftrace_dump_on_oops;
140
141 /* When set, tracing will stop when a WARN*() is hit */
142 int __disable_trace_on_warning;
143
144 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
145 /* Map of enums to their values, for "eval_map" file */
146 struct trace_eval_map_head {
147 struct module *mod;
148 unsigned long length;
149 };
150
151 union trace_eval_map_item;
152
153 struct trace_eval_map_tail {
154 /*
155 * "end" is first and points to NULL as it must be different
156 * than "mod" or "eval_string"
157 */
158 union trace_eval_map_item *next;
159 const char *end; /* points to NULL */
160 };
161
162 static DEFINE_MUTEX(trace_eval_mutex);
163
164 /*
165 * The trace_eval_maps are saved in an array with two extra elements,
166 * one at the beginning, and one at the end. The beginning item contains
167 * the count of the saved maps (head.length), and the module they
168 * belong to if not built in (head.mod). The ending item contains a
169 * pointer to the next array of saved eval_map items.
170 */
171 union trace_eval_map_item {
172 struct trace_eval_map map;
173 struct trace_eval_map_head head;
174 struct trace_eval_map_tail tail;
175 };
176
177 static union trace_eval_map_item *trace_eval_maps;
178 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
179
180 int tracing_set_tracer(struct trace_array *tr, const char *buf);
181 static void ftrace_trace_userstack(struct trace_array *tr,
182 struct trace_buffer *buffer,
183 unsigned int trace_ctx);
184
185 #define MAX_TRACER_SIZE 100
186 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
187 static char *default_bootup_tracer;
188
189 static bool allocate_snapshot;
190
set_cmdline_ftrace(char * str)191 static int __init set_cmdline_ftrace(char *str)
192 {
193 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
194 default_bootup_tracer = bootup_tracer_buf;
195 /* We are using ftrace early, expand it */
196 ring_buffer_expanded = true;
197 return 1;
198 }
199 __setup("ftrace=", set_cmdline_ftrace);
200
set_ftrace_dump_on_oops(char * str)201 static int __init set_ftrace_dump_on_oops(char *str)
202 {
203 if (*str++ != '=' || !*str || !strcmp("1", str)) {
204 ftrace_dump_on_oops = DUMP_ALL;
205 return 1;
206 }
207
208 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
209 ftrace_dump_on_oops = DUMP_ORIG;
210 return 1;
211 }
212
213 return 0;
214 }
215 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
216
stop_trace_on_warning(char * str)217 static int __init stop_trace_on_warning(char *str)
218 {
219 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
220 __disable_trace_on_warning = 1;
221 return 1;
222 }
223 __setup("traceoff_on_warning", stop_trace_on_warning);
224
boot_alloc_snapshot(char * str)225 static int __init boot_alloc_snapshot(char *str)
226 {
227 allocate_snapshot = true;
228 /* We also need the main ring buffer expanded */
229 ring_buffer_expanded = true;
230 return 1;
231 }
232 __setup("alloc_snapshot", boot_alloc_snapshot);
233
234
235 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
236
set_trace_boot_options(char * str)237 static int __init set_trace_boot_options(char *str)
238 {
239 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
240 return 1;
241 }
242 __setup("trace_options=", set_trace_boot_options);
243
244 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
245 static char *trace_boot_clock __initdata;
246
set_trace_boot_clock(char * str)247 static int __init set_trace_boot_clock(char *str)
248 {
249 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
250 trace_boot_clock = trace_boot_clock_buf;
251 return 1;
252 }
253 __setup("trace_clock=", set_trace_boot_clock);
254
set_tracepoint_printk(char * str)255 static int __init set_tracepoint_printk(char *str)
256 {
257 /* Ignore the "tp_printk_stop_on_boot" param */
258 if (*str == '_')
259 return 0;
260
261 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
262 tracepoint_printk = 1;
263 return 1;
264 }
265 __setup("tp_printk", set_tracepoint_printk);
266
set_tracepoint_printk_stop(char * str)267 static int __init set_tracepoint_printk_stop(char *str)
268 {
269 tracepoint_printk_stop_on_boot = true;
270 return 1;
271 }
272 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
273
ns2usecs(u64 nsec)274 unsigned long long ns2usecs(u64 nsec)
275 {
276 nsec += 500;
277 do_div(nsec, 1000);
278 return nsec;
279 }
280
281 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)282 trace_process_export(struct trace_export *export,
283 struct ring_buffer_event *event, int flag)
284 {
285 struct trace_entry *entry;
286 unsigned int size = 0;
287
288 if (export->flags & flag) {
289 entry = ring_buffer_event_data(event);
290 size = ring_buffer_event_length(event);
291 export->write(export, entry, size);
292 }
293 }
294
295 static DEFINE_MUTEX(ftrace_export_lock);
296
297 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
298
299 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
300 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
301 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
302
ftrace_exports_enable(struct trace_export * export)303 static inline void ftrace_exports_enable(struct trace_export *export)
304 {
305 if (export->flags & TRACE_EXPORT_FUNCTION)
306 static_branch_inc(&trace_function_exports_enabled);
307
308 if (export->flags & TRACE_EXPORT_EVENT)
309 static_branch_inc(&trace_event_exports_enabled);
310
311 if (export->flags & TRACE_EXPORT_MARKER)
312 static_branch_inc(&trace_marker_exports_enabled);
313 }
314
ftrace_exports_disable(struct trace_export * export)315 static inline void ftrace_exports_disable(struct trace_export *export)
316 {
317 if (export->flags & TRACE_EXPORT_FUNCTION)
318 static_branch_dec(&trace_function_exports_enabled);
319
320 if (export->flags & TRACE_EXPORT_EVENT)
321 static_branch_dec(&trace_event_exports_enabled);
322
323 if (export->flags & TRACE_EXPORT_MARKER)
324 static_branch_dec(&trace_marker_exports_enabled);
325 }
326
ftrace_exports(struct ring_buffer_event * event,int flag)327 static void ftrace_exports(struct ring_buffer_event *event, int flag)
328 {
329 struct trace_export *export;
330
331 preempt_disable_notrace();
332
333 export = rcu_dereference_raw_check(ftrace_exports_list);
334 while (export) {
335 trace_process_export(export, event, flag);
336 export = rcu_dereference_raw_check(export->next);
337 }
338
339 preempt_enable_notrace();
340 }
341
342 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)343 add_trace_export(struct trace_export **list, struct trace_export *export)
344 {
345 rcu_assign_pointer(export->next, *list);
346 /*
347 * We are entering export into the list but another
348 * CPU might be walking that list. We need to make sure
349 * the export->next pointer is valid before another CPU sees
350 * the export pointer included into the list.
351 */
352 rcu_assign_pointer(*list, export);
353 }
354
355 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)356 rm_trace_export(struct trace_export **list, struct trace_export *export)
357 {
358 struct trace_export **p;
359
360 for (p = list; *p != NULL; p = &(*p)->next)
361 if (*p == export)
362 break;
363
364 if (*p != export)
365 return -1;
366
367 rcu_assign_pointer(*p, (*p)->next);
368
369 return 0;
370 }
371
372 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)373 add_ftrace_export(struct trace_export **list, struct trace_export *export)
374 {
375 ftrace_exports_enable(export);
376
377 add_trace_export(list, export);
378 }
379
380 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)381 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383 int ret;
384
385 ret = rm_trace_export(list, export);
386 ftrace_exports_disable(export);
387
388 return ret;
389 }
390
register_ftrace_export(struct trace_export * export)391 int register_ftrace_export(struct trace_export *export)
392 {
393 if (WARN_ON_ONCE(!export->write))
394 return -1;
395
396 mutex_lock(&ftrace_export_lock);
397
398 add_ftrace_export(&ftrace_exports_list, export);
399
400 mutex_unlock(&ftrace_export_lock);
401
402 return 0;
403 }
404 EXPORT_SYMBOL_GPL(register_ftrace_export);
405
unregister_ftrace_export(struct trace_export * export)406 int unregister_ftrace_export(struct trace_export *export)
407 {
408 int ret;
409
410 mutex_lock(&ftrace_export_lock);
411
412 ret = rm_ftrace_export(&ftrace_exports_list, export);
413
414 mutex_unlock(&ftrace_export_lock);
415
416 return ret;
417 }
418 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
419
420 /* trace_flags holds trace_options default values */
421 #define TRACE_DEFAULT_FLAGS \
422 (FUNCTION_DEFAULT_FLAGS | \
423 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
424 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
425 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
426 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
427 TRACE_ITER_HASH_PTR)
428
429 /* trace_options that are only supported by global_trace */
430 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
431 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
432
433 /* trace_flags that are default zero for instances */
434 #define ZEROED_TRACE_FLAGS \
435 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
436
437 /*
438 * The global_trace is the descriptor that holds the top-level tracing
439 * buffers for the live tracing.
440 */
441 static struct trace_array global_trace = {
442 .trace_flags = TRACE_DEFAULT_FLAGS,
443 };
444
445 LIST_HEAD(ftrace_trace_arrays);
446
trace_array_get(struct trace_array * this_tr)447 int trace_array_get(struct trace_array *this_tr)
448 {
449 struct trace_array *tr;
450 int ret = -ENODEV;
451
452 mutex_lock(&trace_types_lock);
453 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
454 if (tr == this_tr) {
455 tr->ref++;
456 ret = 0;
457 break;
458 }
459 }
460 mutex_unlock(&trace_types_lock);
461
462 return ret;
463 }
464
__trace_array_put(struct trace_array * this_tr)465 static void __trace_array_put(struct trace_array *this_tr)
466 {
467 WARN_ON(!this_tr->ref);
468 this_tr->ref--;
469 }
470
471 /**
472 * trace_array_put - Decrement the reference counter for this trace array.
473 * @this_tr : pointer to the trace array
474 *
475 * NOTE: Use this when we no longer need the trace array returned by
476 * trace_array_get_by_name(). This ensures the trace array can be later
477 * destroyed.
478 *
479 */
trace_array_put(struct trace_array * this_tr)480 void trace_array_put(struct trace_array *this_tr)
481 {
482 if (!this_tr)
483 return;
484
485 mutex_lock(&trace_types_lock);
486 __trace_array_put(this_tr);
487 mutex_unlock(&trace_types_lock);
488 }
489 EXPORT_SYMBOL_GPL(trace_array_put);
490
tracing_check_open_get_tr(struct trace_array * tr)491 int tracing_check_open_get_tr(struct trace_array *tr)
492 {
493 int ret;
494
495 ret = security_locked_down(LOCKDOWN_TRACEFS);
496 if (ret)
497 return ret;
498
499 if (tracing_disabled)
500 return -ENODEV;
501
502 if (tr && trace_array_get(tr) < 0)
503 return -ENODEV;
504
505 return 0;
506 }
507
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)508 int call_filter_check_discard(struct trace_event_call *call, void *rec,
509 struct trace_buffer *buffer,
510 struct ring_buffer_event *event)
511 {
512 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
513 !filter_match_preds(call->filter, rec)) {
514 __trace_event_discard_commit(buffer, event);
515 return 1;
516 }
517
518 return 0;
519 }
520
521 /**
522 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523 * @filtered_pids: The list of pids to check
524 * @search_pid: The PID to find in @filtered_pids
525 *
526 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527 */
528 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 return trace_pid_list_is_set(filtered_pids, search_pid);
532 }
533
534 /**
535 * trace_ignore_this_task - should a task be ignored for tracing
536 * @filtered_pids: The list of pids to check
537 * @filtered_no_pids: The list of pids not to be traced
538 * @task: The task that should be ignored if not filtered
539 *
540 * Checks if @task should be traced or not from @filtered_pids.
541 * Returns true if @task should *NOT* be traced.
542 * Returns false if @task should be traced.
543 */
544 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)545 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
546 struct trace_pid_list *filtered_no_pids,
547 struct task_struct *task)
548 {
549 /*
550 * If filtered_no_pids is not empty, and the task's pid is listed
551 * in filtered_no_pids, then return true.
552 * Otherwise, if filtered_pids is empty, that means we can
553 * trace all tasks. If it has content, then only trace pids
554 * within filtered_pids.
555 */
556
557 return (filtered_pids &&
558 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
559 (filtered_no_pids &&
560 trace_find_filtered_pid(filtered_no_pids, task->pid));
561 }
562
563 /**
564 * trace_filter_add_remove_task - Add or remove a task from a pid_list
565 * @pid_list: The list to modify
566 * @self: The current task for fork or NULL for exit
567 * @task: The task to add or remove
568 *
569 * If adding a task, if @self is defined, the task is only added if @self
570 * is also included in @pid_list. This happens on fork and tasks should
571 * only be added when the parent is listed. If @self is NULL, then the
572 * @task pid will be removed from the list, which would happen on exit
573 * of a task.
574 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)575 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
576 struct task_struct *self,
577 struct task_struct *task)
578 {
579 if (!pid_list)
580 return;
581
582 /* For forks, we only add if the forking task is listed */
583 if (self) {
584 if (!trace_find_filtered_pid(pid_list, self->pid))
585 return;
586 }
587
588 /* "self" is set for forks, and NULL for exits */
589 if (self)
590 trace_pid_list_set(pid_list, task->pid);
591 else
592 trace_pid_list_clear(pid_list, task->pid);
593 }
594
595 /**
596 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
597 * @pid_list: The pid list to show
598 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
599 * @pos: The position of the file
600 *
601 * This is used by the seq_file "next" operation to iterate the pids
602 * listed in a trace_pid_list structure.
603 *
604 * Returns the pid+1 as we want to display pid of zero, but NULL would
605 * stop the iteration.
606 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)607 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
608 {
609 long pid = (unsigned long)v;
610 unsigned int next;
611
612 (*pos)++;
613
614 /* pid already is +1 of the actual previous bit */
615 if (trace_pid_list_next(pid_list, pid, &next) < 0)
616 return NULL;
617
618 pid = next;
619
620 /* Return pid + 1 to allow zero to be represented */
621 return (void *)(pid + 1);
622 }
623
624 /**
625 * trace_pid_start - Used for seq_file to start reading pid lists
626 * @pid_list: The pid list to show
627 * @pos: The position of the file
628 *
629 * This is used by seq_file "start" operation to start the iteration
630 * of listing pids.
631 *
632 * Returns the pid+1 as we want to display pid of zero, but NULL would
633 * stop the iteration.
634 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)635 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
636 {
637 unsigned long pid;
638 unsigned int first;
639 loff_t l = 0;
640
641 if (trace_pid_list_first(pid_list, &first) < 0)
642 return NULL;
643
644 pid = first;
645
646 /* Return pid + 1 so that zero can be the exit value */
647 for (pid++; pid && l < *pos;
648 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
649 ;
650 return (void *)pid;
651 }
652
653 /**
654 * trace_pid_show - show the current pid in seq_file processing
655 * @m: The seq_file structure to write into
656 * @v: A void pointer of the pid (+1) value to display
657 *
658 * Can be directly used by seq_file operations to display the current
659 * pid value.
660 */
trace_pid_show(struct seq_file * m,void * v)661 int trace_pid_show(struct seq_file *m, void *v)
662 {
663 unsigned long pid = (unsigned long)v - 1;
664
665 seq_printf(m, "%lu\n", pid);
666 return 0;
667 }
668
669 /* 128 should be much more than enough */
670 #define PID_BUF_SIZE 127
671
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)672 int trace_pid_write(struct trace_pid_list *filtered_pids,
673 struct trace_pid_list **new_pid_list,
674 const char __user *ubuf, size_t cnt)
675 {
676 struct trace_pid_list *pid_list;
677 struct trace_parser parser;
678 unsigned long val;
679 int nr_pids = 0;
680 ssize_t read = 0;
681 ssize_t ret;
682 loff_t pos;
683 pid_t pid;
684
685 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
686 return -ENOMEM;
687
688 /*
689 * Always recreate a new array. The write is an all or nothing
690 * operation. Always create a new array when adding new pids by
691 * the user. If the operation fails, then the current list is
692 * not modified.
693 */
694 pid_list = trace_pid_list_alloc();
695 if (!pid_list) {
696 trace_parser_put(&parser);
697 return -ENOMEM;
698 }
699
700 if (filtered_pids) {
701 /* copy the current bits to the new max */
702 ret = trace_pid_list_first(filtered_pids, &pid);
703 while (!ret) {
704 trace_pid_list_set(pid_list, pid);
705 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
706 nr_pids++;
707 }
708 }
709
710 ret = 0;
711 while (cnt > 0) {
712
713 pos = 0;
714
715 ret = trace_get_user(&parser, ubuf, cnt, &pos);
716 if (ret < 0)
717 break;
718
719 read += ret;
720 ubuf += ret;
721 cnt -= ret;
722
723 if (!trace_parser_loaded(&parser))
724 break;
725
726 ret = -EINVAL;
727 if (kstrtoul(parser.buffer, 0, &val))
728 break;
729
730 pid = (pid_t)val;
731
732 if (trace_pid_list_set(pid_list, pid) < 0) {
733 ret = -1;
734 break;
735 }
736 nr_pids++;
737
738 trace_parser_clear(&parser);
739 ret = 0;
740 }
741 trace_parser_put(&parser);
742
743 if (ret < 0) {
744 trace_pid_list_free(pid_list);
745 return ret;
746 }
747
748 if (!nr_pids) {
749 /* Cleared the list of pids */
750 trace_pid_list_free(pid_list);
751 pid_list = NULL;
752 }
753
754 *new_pid_list = pid_list;
755
756 return read;
757 }
758
buffer_ftrace_now(struct array_buffer * buf,int cpu)759 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
760 {
761 u64 ts;
762
763 /* Early boot up does not have a buffer yet */
764 if (!buf->buffer)
765 return trace_clock_local();
766
767 ts = ring_buffer_time_stamp(buf->buffer);
768 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
769
770 return ts;
771 }
772
ftrace_now(int cpu)773 u64 ftrace_now(int cpu)
774 {
775 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
776 }
777
778 /**
779 * tracing_is_enabled - Show if global_trace has been enabled
780 *
781 * Shows if the global trace has been enabled or not. It uses the
782 * mirror flag "buffer_disabled" to be used in fast paths such as for
783 * the irqsoff tracer. But it may be inaccurate due to races. If you
784 * need to know the accurate state, use tracing_is_on() which is a little
785 * slower, but accurate.
786 */
tracing_is_enabled(void)787 int tracing_is_enabled(void)
788 {
789 /*
790 * For quick access (irqsoff uses this in fast path), just
791 * return the mirror variable of the state of the ring buffer.
792 * It's a little racy, but we don't really care.
793 */
794 smp_rmb();
795 return !global_trace.buffer_disabled;
796 }
797
798 /*
799 * trace_buf_size is the size in bytes that is allocated
800 * for a buffer. Note, the number of bytes is always rounded
801 * to page size.
802 *
803 * This number is purposely set to a low number of 16384.
804 * If the dump on oops happens, it will be much appreciated
805 * to not have to wait for all that output. Anyway this can be
806 * boot time and run time configurable.
807 */
808 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
809
810 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
811
812 /* trace_types holds a link list of available tracers. */
813 static struct tracer *trace_types __read_mostly;
814
815 /*
816 * trace_types_lock is used to protect the trace_types list.
817 */
818 DEFINE_MUTEX(trace_types_lock);
819
820 /*
821 * serialize the access of the ring buffer
822 *
823 * ring buffer serializes readers, but it is low level protection.
824 * The validity of the events (which returns by ring_buffer_peek() ..etc)
825 * are not protected by ring buffer.
826 *
827 * The content of events may become garbage if we allow other process consumes
828 * these events concurrently:
829 * A) the page of the consumed events may become a normal page
830 * (not reader page) in ring buffer, and this page will be rewritten
831 * by events producer.
832 * B) The page of the consumed events may become a page for splice_read,
833 * and this page will be returned to system.
834 *
835 * These primitives allow multi process access to different cpu ring buffer
836 * concurrently.
837 *
838 * These primitives don't distinguish read-only and read-consume access.
839 * Multi read-only access are also serialized.
840 */
841
842 #ifdef CONFIG_SMP
843 static DECLARE_RWSEM(all_cpu_access_lock);
844 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
845
trace_access_lock(int cpu)846 static inline void trace_access_lock(int cpu)
847 {
848 if (cpu == RING_BUFFER_ALL_CPUS) {
849 /* gain it for accessing the whole ring buffer. */
850 down_write(&all_cpu_access_lock);
851 } else {
852 /* gain it for accessing a cpu ring buffer. */
853
854 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
855 down_read(&all_cpu_access_lock);
856
857 /* Secondly block other access to this @cpu ring buffer. */
858 mutex_lock(&per_cpu(cpu_access_lock, cpu));
859 }
860 }
861
trace_access_unlock(int cpu)862 static inline void trace_access_unlock(int cpu)
863 {
864 if (cpu == RING_BUFFER_ALL_CPUS) {
865 up_write(&all_cpu_access_lock);
866 } else {
867 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
868 up_read(&all_cpu_access_lock);
869 }
870 }
871
trace_access_lock_init(void)872 static inline void trace_access_lock_init(void)
873 {
874 int cpu;
875
876 for_each_possible_cpu(cpu)
877 mutex_init(&per_cpu(cpu_access_lock, cpu));
878 }
879
880 #else
881
882 static DEFINE_MUTEX(access_lock);
883
trace_access_lock(int cpu)884 static inline void trace_access_lock(int cpu)
885 {
886 (void)cpu;
887 mutex_lock(&access_lock);
888 }
889
trace_access_unlock(int cpu)890 static inline void trace_access_unlock(int cpu)
891 {
892 (void)cpu;
893 mutex_unlock(&access_lock);
894 }
895
trace_access_lock_init(void)896 static inline void trace_access_lock_init(void)
897 {
898 }
899
900 #endif
901
902 #ifdef CONFIG_STACKTRACE
903 static void __ftrace_trace_stack(struct trace_buffer *buffer,
904 unsigned int trace_ctx,
905 int skip, struct pt_regs *regs);
906 static inline void ftrace_trace_stack(struct trace_array *tr,
907 struct trace_buffer *buffer,
908 unsigned int trace_ctx,
909 int skip, struct pt_regs *regs);
910
911 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)912 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
913 unsigned int trace_ctx,
914 int skip, struct pt_regs *regs)
915 {
916 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)917 static inline void ftrace_trace_stack(struct trace_array *tr,
918 struct trace_buffer *buffer,
919 unsigned long trace_ctx,
920 int skip, struct pt_regs *regs)
921 {
922 }
923
924 #endif
925
926 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)927 trace_event_setup(struct ring_buffer_event *event,
928 int type, unsigned int trace_ctx)
929 {
930 struct trace_entry *ent = ring_buffer_event_data(event);
931
932 tracing_generic_entry_update(ent, type, trace_ctx);
933 }
934
935 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)936 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
937 int type,
938 unsigned long len,
939 unsigned int trace_ctx)
940 {
941 struct ring_buffer_event *event;
942
943 event = ring_buffer_lock_reserve(buffer, len);
944 if (event != NULL)
945 trace_event_setup(event, type, trace_ctx);
946
947 return event;
948 }
949
tracer_tracing_on(struct trace_array * tr)950 void tracer_tracing_on(struct trace_array *tr)
951 {
952 if (tr->array_buffer.buffer)
953 ring_buffer_record_on(tr->array_buffer.buffer);
954 /*
955 * This flag is looked at when buffers haven't been allocated
956 * yet, or by some tracers (like irqsoff), that just want to
957 * know if the ring buffer has been disabled, but it can handle
958 * races of where it gets disabled but we still do a record.
959 * As the check is in the fast path of the tracers, it is more
960 * important to be fast than accurate.
961 */
962 tr->buffer_disabled = 0;
963 /* Make the flag seen by readers */
964 smp_wmb();
965 }
966
967 /**
968 * tracing_on - enable tracing buffers
969 *
970 * This function enables tracing buffers that may have been
971 * disabled with tracing_off.
972 */
tracing_on(void)973 void tracing_on(void)
974 {
975 tracer_tracing_on(&global_trace);
976 }
977 EXPORT_SYMBOL_GPL(tracing_on);
978
979
980 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)981 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
982 {
983 __this_cpu_write(trace_taskinfo_save, true);
984
985 /* If this is the temp buffer, we need to commit fully */
986 if (this_cpu_read(trace_buffered_event) == event) {
987 /* Length is in event->array[0] */
988 ring_buffer_write(buffer, event->array[0], &event->array[1]);
989 /* Release the temp buffer */
990 this_cpu_dec(trace_buffered_event_cnt);
991 } else
992 ring_buffer_unlock_commit(buffer, event);
993 }
994
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)995 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
996 const char *str, int size)
997 {
998 struct ring_buffer_event *event;
999 struct trace_buffer *buffer;
1000 struct print_entry *entry;
1001 unsigned int trace_ctx;
1002 int alloc;
1003
1004 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1005 return 0;
1006
1007 if (unlikely(tracing_selftest_running || tracing_disabled))
1008 return 0;
1009
1010 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1011
1012 trace_ctx = tracing_gen_ctx();
1013 buffer = tr->array_buffer.buffer;
1014 ring_buffer_nest_start(buffer);
1015 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1016 trace_ctx);
1017 if (!event) {
1018 size = 0;
1019 goto out;
1020 }
1021
1022 entry = ring_buffer_event_data(event);
1023 entry->ip = ip;
1024
1025 memcpy(&entry->buf, str, size);
1026
1027 /* Add a newline if necessary */
1028 if (entry->buf[size - 1] != '\n') {
1029 entry->buf[size] = '\n';
1030 entry->buf[size + 1] = '\0';
1031 } else
1032 entry->buf[size] = '\0';
1033
1034 __buffer_unlock_commit(buffer, event);
1035 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1036 out:
1037 ring_buffer_nest_end(buffer);
1038 return size;
1039 }
1040 EXPORT_SYMBOL_GPL(__trace_array_puts);
1041
1042 /**
1043 * __trace_puts - write a constant string into the trace buffer.
1044 * @ip: The address of the caller
1045 * @str: The constant string to write
1046 * @size: The size of the string.
1047 */
__trace_puts(unsigned long ip,const char * str,int size)1048 int __trace_puts(unsigned long ip, const char *str, int size)
1049 {
1050 return __trace_array_puts(&global_trace, ip, str, size);
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053
1054 /**
1055 * __trace_bputs - write the pointer to a constant string into trace buffer
1056 * @ip: The address of the caller
1057 * @str: The constant string to write to the buffer to
1058 */
__trace_bputs(unsigned long ip,const char * str)1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061 struct ring_buffer_event *event;
1062 struct trace_buffer *buffer;
1063 struct bputs_entry *entry;
1064 unsigned int trace_ctx;
1065 int size = sizeof(struct bputs_entry);
1066 int ret = 0;
1067
1068 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069 return 0;
1070
1071 if (unlikely(tracing_selftest_running || tracing_disabled))
1072 return 0;
1073
1074 trace_ctx = tracing_gen_ctx();
1075 buffer = global_trace.array_buffer.buffer;
1076
1077 ring_buffer_nest_start(buffer);
1078 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079 trace_ctx);
1080 if (!event)
1081 goto out;
1082
1083 entry = ring_buffer_event_data(event);
1084 entry->ip = ip;
1085 entry->str = str;
1086
1087 __buffer_unlock_commit(buffer, event);
1088 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089
1090 ret = 1;
1091 out:
1092 ring_buffer_nest_end(buffer);
1093 return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096
1097 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099 void *cond_data)
1100 {
1101 struct tracer *tracer = tr->current_trace;
1102 unsigned long flags;
1103
1104 if (in_nmi()) {
1105 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1107 return;
1108 }
1109
1110 if (!tr->allocated_snapshot) {
1111 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1112 trace_array_puts(tr, "*** stopping trace here! ***\n");
1113 tracer_tracing_off(tr);
1114 return;
1115 }
1116
1117 /* Note, snapshot can not be used when the tracer uses it */
1118 if (tracer->use_max_tr) {
1119 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1120 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1121 return;
1122 }
1123
1124 local_irq_save(flags);
1125 update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 local_irq_restore(flags);
1127 }
1128
tracing_snapshot_instance(struct trace_array * tr)1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131 tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133
1134 /**
1135 * tracing_snapshot - take a snapshot of the current buffer.
1136 *
1137 * This causes a swap between the snapshot buffer and the current live
1138 * tracing buffer. You can use this to take snapshots of the live
1139 * trace when some condition is triggered, but continue to trace.
1140 *
1141 * Note, make sure to allocate the snapshot with either
1142 * a tracing_snapshot_alloc(), or by doing it manually
1143 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144 *
1145 * If the snapshot buffer is not allocated, it will stop tracing.
1146 * Basically making a permanent snapshot.
1147 */
tracing_snapshot(void)1148 void tracing_snapshot(void)
1149 {
1150 struct trace_array *tr = &global_trace;
1151
1152 tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155
1156 /**
1157 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158 * @tr: The tracing instance to snapshot
1159 * @cond_data: The data to be tested conditionally, and possibly saved
1160 *
1161 * This is the same as tracing_snapshot() except that the snapshot is
1162 * conditional - the snapshot will only happen if the
1163 * cond_snapshot.update() implementation receiving the cond_data
1164 * returns true, which means that the trace array's cond_snapshot
1165 * update() operation used the cond_data to determine whether the
1166 * snapshot should be taken, and if it was, presumably saved it along
1167 * with the snapshot.
1168 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171 tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174
1175 /**
1176 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177 * @tr: The tracing instance
1178 *
1179 * When the user enables a conditional snapshot using
1180 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181 * with the snapshot. This accessor is used to retrieve it.
1182 *
1183 * Should not be called from cond_snapshot.update(), since it takes
1184 * the tr->max_lock lock, which the code calling
1185 * cond_snapshot.update() has already done.
1186 *
1187 * Returns the cond_data associated with the trace array's snapshot.
1188 */
tracing_cond_snapshot_data(struct trace_array * tr)1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191 void *cond_data = NULL;
1192
1193 local_irq_disable();
1194 arch_spin_lock(&tr->max_lock);
1195
1196 if (tr->cond_snapshot)
1197 cond_data = tr->cond_snapshot->cond_data;
1198
1199 arch_spin_unlock(&tr->max_lock);
1200 local_irq_enable();
1201
1202 return cond_data;
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1205
1206 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1207 struct array_buffer *size_buf, int cpu_id);
1208 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1209
tracing_alloc_snapshot_instance(struct trace_array * tr)1210 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1211 {
1212 int ret;
1213
1214 if (!tr->allocated_snapshot) {
1215
1216 /* allocate spare buffer */
1217 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1218 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1219 if (ret < 0)
1220 return ret;
1221
1222 tr->allocated_snapshot = true;
1223 }
1224
1225 return 0;
1226 }
1227
free_snapshot(struct trace_array * tr)1228 static void free_snapshot(struct trace_array *tr)
1229 {
1230 /*
1231 * We don't free the ring buffer. instead, resize it because
1232 * The max_tr ring buffer has some state (e.g. ring->clock) and
1233 * we want preserve it.
1234 */
1235 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1236 set_buffer_entries(&tr->max_buffer, 1);
1237 tracing_reset_online_cpus(&tr->max_buffer);
1238 tr->allocated_snapshot = false;
1239 }
1240
1241 /**
1242 * tracing_alloc_snapshot - allocate snapshot buffer.
1243 *
1244 * This only allocates the snapshot buffer if it isn't already
1245 * allocated - it doesn't also take a snapshot.
1246 *
1247 * This is meant to be used in cases where the snapshot buffer needs
1248 * to be set up for events that can't sleep but need to be able to
1249 * trigger a snapshot.
1250 */
tracing_alloc_snapshot(void)1251 int tracing_alloc_snapshot(void)
1252 {
1253 struct trace_array *tr = &global_trace;
1254 int ret;
1255
1256 ret = tracing_alloc_snapshot_instance(tr);
1257 WARN_ON(ret < 0);
1258
1259 return ret;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1262
1263 /**
1264 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1265 *
1266 * This is similar to tracing_snapshot(), but it will allocate the
1267 * snapshot buffer if it isn't already allocated. Use this only
1268 * where it is safe to sleep, as the allocation may sleep.
1269 *
1270 * This causes a swap between the snapshot buffer and the current live
1271 * tracing buffer. You can use this to take snapshots of the live
1272 * trace when some condition is triggered, but continue to trace.
1273 */
tracing_snapshot_alloc(void)1274 void tracing_snapshot_alloc(void)
1275 {
1276 int ret;
1277
1278 ret = tracing_alloc_snapshot();
1279 if (ret < 0)
1280 return;
1281
1282 tracing_snapshot();
1283 }
1284 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1285
1286 /**
1287 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1288 * @tr: The tracing instance
1289 * @cond_data: User data to associate with the snapshot
1290 * @update: Implementation of the cond_snapshot update function
1291 *
1292 * Check whether the conditional snapshot for the given instance has
1293 * already been enabled, or if the current tracer is already using a
1294 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1295 * save the cond_data and update function inside.
1296 *
1297 * Returns 0 if successful, error otherwise.
1298 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1299 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1300 cond_update_fn_t update)
1301 {
1302 struct cond_snapshot *cond_snapshot;
1303 int ret = 0;
1304
1305 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1306 if (!cond_snapshot)
1307 return -ENOMEM;
1308
1309 cond_snapshot->cond_data = cond_data;
1310 cond_snapshot->update = update;
1311
1312 mutex_lock(&trace_types_lock);
1313
1314 ret = tracing_alloc_snapshot_instance(tr);
1315 if (ret)
1316 goto fail_unlock;
1317
1318 if (tr->current_trace->use_max_tr) {
1319 ret = -EBUSY;
1320 goto fail_unlock;
1321 }
1322
1323 /*
1324 * The cond_snapshot can only change to NULL without the
1325 * trace_types_lock. We don't care if we race with it going
1326 * to NULL, but we want to make sure that it's not set to
1327 * something other than NULL when we get here, which we can
1328 * do safely with only holding the trace_types_lock and not
1329 * having to take the max_lock.
1330 */
1331 if (tr->cond_snapshot) {
1332 ret = -EBUSY;
1333 goto fail_unlock;
1334 }
1335
1336 local_irq_disable();
1337 arch_spin_lock(&tr->max_lock);
1338 tr->cond_snapshot = cond_snapshot;
1339 arch_spin_unlock(&tr->max_lock);
1340 local_irq_enable();
1341
1342 mutex_unlock(&trace_types_lock);
1343
1344 return ret;
1345
1346 fail_unlock:
1347 mutex_unlock(&trace_types_lock);
1348 kfree(cond_snapshot);
1349 return ret;
1350 }
1351 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1352
1353 /**
1354 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1355 * @tr: The tracing instance
1356 *
1357 * Check whether the conditional snapshot for the given instance is
1358 * enabled; if so, free the cond_snapshot associated with it,
1359 * otherwise return -EINVAL.
1360 *
1361 * Returns 0 if successful, error otherwise.
1362 */
tracing_snapshot_cond_disable(struct trace_array * tr)1363 int tracing_snapshot_cond_disable(struct trace_array *tr)
1364 {
1365 int ret = 0;
1366
1367 local_irq_disable();
1368 arch_spin_lock(&tr->max_lock);
1369
1370 if (!tr->cond_snapshot)
1371 ret = -EINVAL;
1372 else {
1373 kfree(tr->cond_snapshot);
1374 tr->cond_snapshot = NULL;
1375 }
1376
1377 arch_spin_unlock(&tr->max_lock);
1378 local_irq_enable();
1379
1380 return ret;
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1383 #else
tracing_snapshot(void)1384 void tracing_snapshot(void)
1385 {
1386 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1387 }
1388 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1389 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1390 {
1391 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1394 int tracing_alloc_snapshot(void)
1395 {
1396 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1397 return -ENODEV;
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1400 void tracing_snapshot_alloc(void)
1401 {
1402 /* Give warning */
1403 tracing_snapshot();
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1406 void *tracing_cond_snapshot_data(struct trace_array *tr)
1407 {
1408 return NULL;
1409 }
1410 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1411 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1412 {
1413 return -ENODEV;
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418 return false;
1419 }
1420 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1421 #define free_snapshot(tr) do { } while (0)
1422 #endif /* CONFIG_TRACER_SNAPSHOT */
1423
tracer_tracing_off(struct trace_array * tr)1424 void tracer_tracing_off(struct trace_array *tr)
1425 {
1426 if (tr->array_buffer.buffer)
1427 ring_buffer_record_off(tr->array_buffer.buffer);
1428 /*
1429 * This flag is looked at when buffers haven't been allocated
1430 * yet, or by some tracers (like irqsoff), that just want to
1431 * know if the ring buffer has been disabled, but it can handle
1432 * races of where it gets disabled but we still do a record.
1433 * As the check is in the fast path of the tracers, it is more
1434 * important to be fast than accurate.
1435 */
1436 tr->buffer_disabled = 1;
1437 /* Make the flag seen by readers */
1438 smp_wmb();
1439 }
1440
1441 /**
1442 * tracing_off - turn off tracing buffers
1443 *
1444 * This function stops the tracing buffers from recording data.
1445 * It does not disable any overhead the tracers themselves may
1446 * be causing. This function simply causes all recording to
1447 * the ring buffers to fail.
1448 */
tracing_off(void)1449 void tracing_off(void)
1450 {
1451 tracer_tracing_off(&global_trace);
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_off);
1454
disable_trace_on_warning(void)1455 void disable_trace_on_warning(void)
1456 {
1457 if (__disable_trace_on_warning) {
1458 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1459 "Disabling tracing due to warning\n");
1460 tracing_off();
1461 }
1462 }
1463
1464 /**
1465 * tracer_tracing_is_on - show real state of ring buffer enabled
1466 * @tr : the trace array to know if ring buffer is enabled
1467 *
1468 * Shows real state of the ring buffer if it is enabled or not.
1469 */
tracer_tracing_is_on(struct trace_array * tr)1470 bool tracer_tracing_is_on(struct trace_array *tr)
1471 {
1472 if (tr->array_buffer.buffer)
1473 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1474 return !tr->buffer_disabled;
1475 }
1476
1477 /**
1478 * tracing_is_on - show state of ring buffers enabled
1479 */
tracing_is_on(void)1480 int tracing_is_on(void)
1481 {
1482 return tracer_tracing_is_on(&global_trace);
1483 }
1484 EXPORT_SYMBOL_GPL(tracing_is_on);
1485
set_buf_size(char * str)1486 static int __init set_buf_size(char *str)
1487 {
1488 unsigned long buf_size;
1489
1490 if (!str)
1491 return 0;
1492 buf_size = memparse(str, &str);
1493 /*
1494 * nr_entries can not be zero and the startup
1495 * tests require some buffer space. Therefore
1496 * ensure we have at least 4096 bytes of buffer.
1497 */
1498 trace_buf_size = max(4096UL, buf_size);
1499 return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502
set_tracing_thresh(char * str)1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 unsigned long threshold;
1506 int ret;
1507
1508 if (!str)
1509 return 0;
1510 ret = kstrtoul(str, 0, &threshold);
1511 if (ret < 0)
1512 return 0;
1513 tracing_thresh = threshold * 1000;
1514 return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517
nsecs_to_usecs(unsigned long nsecs)1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 return nsecs / 1000;
1521 }
1522
1523 /*
1524 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527 * of strings in the order that the evals (enum) were defined.
1528 */
1529 #undef C
1530 #define C(a, b) b
1531
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 TRACE_FLAGS
1535 NULL
1536 };
1537
1538 static struct {
1539 u64 (*func)(void);
1540 const char *name;
1541 int in_ns; /* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 { trace_clock_local, "local", 1 },
1544 { trace_clock_global, "global", 1 },
1545 { trace_clock_counter, "counter", 0 },
1546 { trace_clock_jiffies, "uptime", 0 },
1547 { trace_clock, "perf", 1 },
1548 { ktime_get_mono_fast_ns, "mono", 1 },
1549 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1550 { ktime_get_boot_fast_ns, "boot", 1 },
1551 ARCH_TRACE_CLOCKS
1552 };
1553
trace_clock_in_ns(struct trace_array * tr)1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 if (trace_clocks[tr->clock_id].in_ns)
1557 return true;
1558
1559 return false;
1560 }
1561
1562 /*
1563 * trace_parser_get_init - gets the buffer for trace parser
1564 */
trace_parser_get_init(struct trace_parser * parser,int size)1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 memset(parser, 0, sizeof(*parser));
1568
1569 parser->buffer = kmalloc(size, GFP_KERNEL);
1570 if (!parser->buffer)
1571 return 1;
1572
1573 parser->size = size;
1574 return 0;
1575 }
1576
1577 /*
1578 * trace_parser_put - frees the buffer for trace parser
1579 */
trace_parser_put(struct trace_parser * parser)1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 kfree(parser->buffer);
1583 parser->buffer = NULL;
1584 }
1585
1586 /*
1587 * trace_get_user - reads the user input string separated by space
1588 * (matched by isspace(ch))
1589 *
1590 * For each string found the 'struct trace_parser' is updated,
1591 * and the function returns.
1592 *
1593 * Returns number of bytes read.
1594 *
1595 * See kernel/trace/trace.h for 'struct trace_parser' details.
1596 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 size_t cnt, loff_t *ppos)
1599 {
1600 char ch;
1601 size_t read = 0;
1602 ssize_t ret;
1603
1604 if (!*ppos)
1605 trace_parser_clear(parser);
1606
1607 ret = get_user(ch, ubuf++);
1608 if (ret)
1609 goto out;
1610
1611 read++;
1612 cnt--;
1613
1614 /*
1615 * The parser is not finished with the last write,
1616 * continue reading the user input without skipping spaces.
1617 */
1618 if (!parser->cont) {
1619 /* skip white space */
1620 while (cnt && isspace(ch)) {
1621 ret = get_user(ch, ubuf++);
1622 if (ret)
1623 goto out;
1624 read++;
1625 cnt--;
1626 }
1627
1628 parser->idx = 0;
1629
1630 /* only spaces were written */
1631 if (isspace(ch) || !ch) {
1632 *ppos += read;
1633 ret = read;
1634 goto out;
1635 }
1636 }
1637
1638 /* read the non-space input */
1639 while (cnt && !isspace(ch) && ch) {
1640 if (parser->idx < parser->size - 1)
1641 parser->buffer[parser->idx++] = ch;
1642 else {
1643 ret = -EINVAL;
1644 goto out;
1645 }
1646 ret = get_user(ch, ubuf++);
1647 if (ret)
1648 goto out;
1649 read++;
1650 cnt--;
1651 }
1652
1653 /* We either got finished input or we have to wait for another call. */
1654 if (isspace(ch) || !ch) {
1655 parser->buffer[parser->idx] = 0;
1656 parser->cont = false;
1657 } else if (parser->idx < parser->size - 1) {
1658 parser->cont = true;
1659 parser->buffer[parser->idx++] = ch;
1660 /* Make sure the parsed string always terminates with '\0'. */
1661 parser->buffer[parser->idx] = 0;
1662 } else {
1663 ret = -EINVAL;
1664 goto out;
1665 }
1666
1667 *ppos += read;
1668 ret = read;
1669
1670 out:
1671 return ret;
1672 }
1673
1674 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 int len;
1678
1679 if (trace_seq_used(s) <= s->seq.readpos)
1680 return -EBUSY;
1681
1682 len = trace_seq_used(s) - s->seq.readpos;
1683 if (cnt > len)
1684 cnt = len;
1685 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687 s->seq.readpos += cnt;
1688 return cnt;
1689 }
1690
1691 unsigned long __read_mostly tracing_thresh;
1692
1693 #ifdef CONFIG_TRACER_MAX_TRACE
1694 static const struct file_operations tracing_max_lat_fops;
1695
1696 #ifdef LATENCY_FS_NOTIFY
1697
1698 static struct workqueue_struct *fsnotify_wq;
1699
latency_fsnotify_workfn(struct work_struct * work)1700 static void latency_fsnotify_workfn(struct work_struct *work)
1701 {
1702 struct trace_array *tr = container_of(work, struct trace_array,
1703 fsnotify_work);
1704 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1705 }
1706
latency_fsnotify_workfn_irq(struct irq_work * iwork)1707 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1708 {
1709 struct trace_array *tr = container_of(iwork, struct trace_array,
1710 fsnotify_irqwork);
1711 queue_work(fsnotify_wq, &tr->fsnotify_work);
1712 }
1713
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1714 static void trace_create_maxlat_file(struct trace_array *tr,
1715 struct dentry *d_tracer)
1716 {
1717 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1718 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1719 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1720 d_tracer, tr,
1721 &tracing_max_lat_fops);
1722 }
1723
latency_fsnotify_init(void)1724 __init static int latency_fsnotify_init(void)
1725 {
1726 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1727 WQ_UNBOUND | WQ_HIGHPRI, 0);
1728 if (!fsnotify_wq) {
1729 pr_err("Unable to allocate tr_max_lat_wq\n");
1730 return -ENOMEM;
1731 }
1732 return 0;
1733 }
1734
1735 late_initcall_sync(latency_fsnotify_init);
1736
latency_fsnotify(struct trace_array * tr)1737 void latency_fsnotify(struct trace_array *tr)
1738 {
1739 if (!fsnotify_wq)
1740 return;
1741 /*
1742 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1743 * possible that we are called from __schedule() or do_idle(), which
1744 * could cause a deadlock.
1745 */
1746 irq_work_queue(&tr->fsnotify_irqwork);
1747 }
1748
1749 #else /* !LATENCY_FS_NOTIFY */
1750
1751 #define trace_create_maxlat_file(tr, d_tracer) \
1752 trace_create_file("tracing_max_latency", 0644, \
1753 d_tracer, tr, &tracing_max_lat_fops)
1754
1755 #endif
1756
1757 /*
1758 * Copy the new maximum trace into the separate maximum-trace
1759 * structure. (this way the maximum trace is permanently saved,
1760 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1761 */
1762 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1763 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1764 {
1765 struct array_buffer *trace_buf = &tr->array_buffer;
1766 struct array_buffer *max_buf = &tr->max_buffer;
1767 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1768 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1769
1770 max_buf->cpu = cpu;
1771 max_buf->time_start = data->preempt_timestamp;
1772
1773 max_data->saved_latency = tr->max_latency;
1774 max_data->critical_start = data->critical_start;
1775 max_data->critical_end = data->critical_end;
1776
1777 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1778 max_data->pid = tsk->pid;
1779 /*
1780 * If tsk == current, then use current_uid(), as that does not use
1781 * RCU. The irq tracer can be called out of RCU scope.
1782 */
1783 if (tsk == current)
1784 max_data->uid = current_uid();
1785 else
1786 max_data->uid = task_uid(tsk);
1787
1788 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1789 max_data->policy = tsk->policy;
1790 max_data->rt_priority = tsk->rt_priority;
1791
1792 /* record this tasks comm */
1793 tracing_record_cmdline(tsk);
1794 latency_fsnotify(tr);
1795 }
1796
1797 /**
1798 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1799 * @tr: tracer
1800 * @tsk: the task with the latency
1801 * @cpu: The cpu that initiated the trace.
1802 * @cond_data: User data associated with a conditional snapshot
1803 *
1804 * Flip the buffers between the @tr and the max_tr and record information
1805 * about which task was the cause of this latency.
1806 */
1807 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1808 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1809 void *cond_data)
1810 {
1811 if (tr->stop_count)
1812 return;
1813
1814 WARN_ON_ONCE(!irqs_disabled());
1815
1816 if (!tr->allocated_snapshot) {
1817 /* Only the nop tracer should hit this when disabling */
1818 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1819 return;
1820 }
1821
1822 arch_spin_lock(&tr->max_lock);
1823
1824 /* Inherit the recordable setting from array_buffer */
1825 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1826 ring_buffer_record_on(tr->max_buffer.buffer);
1827 else
1828 ring_buffer_record_off(tr->max_buffer.buffer);
1829
1830 #ifdef CONFIG_TRACER_SNAPSHOT
1831 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1832 arch_spin_unlock(&tr->max_lock);
1833 return;
1834 }
1835 #endif
1836 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1837
1838 __update_max_tr(tr, tsk, cpu);
1839
1840 arch_spin_unlock(&tr->max_lock);
1841
1842 /* Any waiters on the old snapshot buffer need to wake up */
1843 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1844 }
1845
1846 /**
1847 * update_max_tr_single - only copy one trace over, and reset the rest
1848 * @tr: tracer
1849 * @tsk: task with the latency
1850 * @cpu: the cpu of the buffer to copy.
1851 *
1852 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1853 */
1854 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1855 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1856 {
1857 int ret;
1858
1859 if (tr->stop_count)
1860 return;
1861
1862 WARN_ON_ONCE(!irqs_disabled());
1863 if (!tr->allocated_snapshot) {
1864 /* Only the nop tracer should hit this when disabling */
1865 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1866 return;
1867 }
1868
1869 arch_spin_lock(&tr->max_lock);
1870
1871 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1872
1873 if (ret == -EBUSY) {
1874 /*
1875 * We failed to swap the buffer due to a commit taking
1876 * place on this CPU. We fail to record, but we reset
1877 * the max trace buffer (no one writes directly to it)
1878 * and flag that it failed.
1879 * Another reason is resize is in progress.
1880 */
1881 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1882 "Failed to swap buffers due to commit or resize in progress\n");
1883 }
1884
1885 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1886
1887 __update_max_tr(tr, tsk, cpu);
1888 arch_spin_unlock(&tr->max_lock);
1889 }
1890
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892
wait_on_pipe(struct trace_iterator * iter,int full)1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895 int ret;
1896
1897 /* Iterators are static, they should be filled or empty */
1898 if (trace_buffer_iter(iter, iter->cpu_file))
1899 return 0;
1900
1901 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1902
1903 #ifdef CONFIG_TRACER_MAX_TRACE
1904 /*
1905 * Make sure this is still the snapshot buffer, as if a snapshot were
1906 * to happen, this would now be the main buffer.
1907 */
1908 if (iter->snapshot)
1909 iter->array_buffer = &iter->tr->max_buffer;
1910 #endif
1911 return ret;
1912 }
1913
1914 #ifdef CONFIG_FTRACE_STARTUP_TEST
1915 static bool selftests_can_run;
1916
1917 struct trace_selftests {
1918 struct list_head list;
1919 struct tracer *type;
1920 };
1921
1922 static LIST_HEAD(postponed_selftests);
1923
save_selftest(struct tracer * type)1924 static int save_selftest(struct tracer *type)
1925 {
1926 struct trace_selftests *selftest;
1927
1928 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1929 if (!selftest)
1930 return -ENOMEM;
1931
1932 selftest->type = type;
1933 list_add(&selftest->list, &postponed_selftests);
1934 return 0;
1935 }
1936
run_tracer_selftest(struct tracer * type)1937 static int run_tracer_selftest(struct tracer *type)
1938 {
1939 struct trace_array *tr = &global_trace;
1940 struct tracer *saved_tracer = tr->current_trace;
1941 int ret;
1942
1943 if (!type->selftest || tracing_selftest_disabled)
1944 return 0;
1945
1946 /*
1947 * If a tracer registers early in boot up (before scheduling is
1948 * initialized and such), then do not run its selftests yet.
1949 * Instead, run it a little later in the boot process.
1950 */
1951 if (!selftests_can_run)
1952 return save_selftest(type);
1953
1954 if (!tracing_is_on()) {
1955 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1956 type->name);
1957 return 0;
1958 }
1959
1960 /*
1961 * Run a selftest on this tracer.
1962 * Here we reset the trace buffer, and set the current
1963 * tracer to be this tracer. The tracer can then run some
1964 * internal tracing to verify that everything is in order.
1965 * If we fail, we do not register this tracer.
1966 */
1967 tracing_reset_online_cpus(&tr->array_buffer);
1968
1969 tr->current_trace = type;
1970
1971 #ifdef CONFIG_TRACER_MAX_TRACE
1972 if (type->use_max_tr) {
1973 /* If we expanded the buffers, make sure the max is expanded too */
1974 if (ring_buffer_expanded)
1975 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1976 RING_BUFFER_ALL_CPUS);
1977 tr->allocated_snapshot = true;
1978 }
1979 #endif
1980
1981 /* the test is responsible for initializing and enabling */
1982 pr_info("Testing tracer %s: ", type->name);
1983 ret = type->selftest(type, tr);
1984 /* the test is responsible for resetting too */
1985 tr->current_trace = saved_tracer;
1986 if (ret) {
1987 printk(KERN_CONT "FAILED!\n");
1988 /* Add the warning after printing 'FAILED' */
1989 WARN_ON(1);
1990 return -1;
1991 }
1992 /* Only reset on passing, to avoid touching corrupted buffers */
1993 tracing_reset_online_cpus(&tr->array_buffer);
1994
1995 #ifdef CONFIG_TRACER_MAX_TRACE
1996 if (type->use_max_tr) {
1997 tr->allocated_snapshot = false;
1998
1999 /* Shrink the max buffer again */
2000 if (ring_buffer_expanded)
2001 ring_buffer_resize(tr->max_buffer.buffer, 1,
2002 RING_BUFFER_ALL_CPUS);
2003 }
2004 #endif
2005
2006 printk(KERN_CONT "PASSED\n");
2007 return 0;
2008 }
2009
init_trace_selftests(void)2010 static __init int init_trace_selftests(void)
2011 {
2012 struct trace_selftests *p, *n;
2013 struct tracer *t, **last;
2014 int ret;
2015
2016 selftests_can_run = true;
2017
2018 mutex_lock(&trace_types_lock);
2019
2020 if (list_empty(&postponed_selftests))
2021 goto out;
2022
2023 pr_info("Running postponed tracer tests:\n");
2024
2025 tracing_selftest_running = true;
2026 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2027 /* This loop can take minutes when sanitizers are enabled, so
2028 * lets make sure we allow RCU processing.
2029 */
2030 cond_resched();
2031 ret = run_tracer_selftest(p->type);
2032 /* If the test fails, then warn and remove from available_tracers */
2033 if (ret < 0) {
2034 WARN(1, "tracer: %s failed selftest, disabling\n",
2035 p->type->name);
2036 last = &trace_types;
2037 for (t = trace_types; t; t = t->next) {
2038 if (t == p->type) {
2039 *last = t->next;
2040 break;
2041 }
2042 last = &t->next;
2043 }
2044 }
2045 list_del(&p->list);
2046 kfree(p);
2047 }
2048 tracing_selftest_running = false;
2049
2050 out:
2051 mutex_unlock(&trace_types_lock);
2052
2053 return 0;
2054 }
2055 core_initcall(init_trace_selftests);
2056 #else
run_tracer_selftest(struct tracer * type)2057 static inline int run_tracer_selftest(struct tracer *type)
2058 {
2059 return 0;
2060 }
2061 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2062
2063 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2064
2065 static void __init apply_trace_boot_options(void);
2066
2067 /**
2068 * register_tracer - register a tracer with the ftrace system.
2069 * @type: the plugin for the tracer
2070 *
2071 * Register a new plugin tracer.
2072 */
register_tracer(struct tracer * type)2073 int __init register_tracer(struct tracer *type)
2074 {
2075 struct tracer *t;
2076 int ret = 0;
2077
2078 if (!type->name) {
2079 pr_info("Tracer must have a name\n");
2080 return -1;
2081 }
2082
2083 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2084 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2085 return -1;
2086 }
2087
2088 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2089 pr_warn("Can not register tracer %s due to lockdown\n",
2090 type->name);
2091 return -EPERM;
2092 }
2093
2094 mutex_lock(&trace_types_lock);
2095
2096 tracing_selftest_running = true;
2097
2098 for (t = trace_types; t; t = t->next) {
2099 if (strcmp(type->name, t->name) == 0) {
2100 /* already found */
2101 pr_info("Tracer %s already registered\n",
2102 type->name);
2103 ret = -1;
2104 goto out;
2105 }
2106 }
2107
2108 if (!type->set_flag)
2109 type->set_flag = &dummy_set_flag;
2110 if (!type->flags) {
2111 /*allocate a dummy tracer_flags*/
2112 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2113 if (!type->flags) {
2114 ret = -ENOMEM;
2115 goto out;
2116 }
2117 type->flags->val = 0;
2118 type->flags->opts = dummy_tracer_opt;
2119 } else
2120 if (!type->flags->opts)
2121 type->flags->opts = dummy_tracer_opt;
2122
2123 /* store the tracer for __set_tracer_option */
2124 type->flags->trace = type;
2125
2126 ret = run_tracer_selftest(type);
2127 if (ret < 0)
2128 goto out;
2129
2130 type->next = trace_types;
2131 trace_types = type;
2132 add_tracer_options(&global_trace, type);
2133
2134 out:
2135 tracing_selftest_running = false;
2136 mutex_unlock(&trace_types_lock);
2137
2138 if (ret || !default_bootup_tracer)
2139 goto out_unlock;
2140
2141 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2142 goto out_unlock;
2143
2144 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2145 /* Do we want this tracer to start on bootup? */
2146 tracing_set_tracer(&global_trace, type->name);
2147 default_bootup_tracer = NULL;
2148
2149 apply_trace_boot_options();
2150
2151 /* disable other selftests, since this will break it. */
2152 disable_tracing_selftest("running a tracer");
2153
2154 out_unlock:
2155 return ret;
2156 }
2157
tracing_reset_cpu(struct array_buffer * buf,int cpu)2158 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2159 {
2160 struct trace_buffer *buffer = buf->buffer;
2161
2162 if (!buffer)
2163 return;
2164
2165 ring_buffer_record_disable(buffer);
2166
2167 /* Make sure all commits have finished */
2168 synchronize_rcu();
2169 ring_buffer_reset_cpu(buffer, cpu);
2170
2171 ring_buffer_record_enable(buffer);
2172 }
2173
tracing_reset_online_cpus(struct array_buffer * buf)2174 void tracing_reset_online_cpus(struct array_buffer *buf)
2175 {
2176 struct trace_buffer *buffer = buf->buffer;
2177
2178 if (!buffer)
2179 return;
2180
2181 ring_buffer_record_disable(buffer);
2182
2183 /* Make sure all commits have finished */
2184 synchronize_rcu();
2185
2186 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2187
2188 ring_buffer_reset_online_cpus(buffer);
2189
2190 ring_buffer_record_enable(buffer);
2191 }
2192
2193 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2194 void tracing_reset_all_online_cpus_unlocked(void)
2195 {
2196 struct trace_array *tr;
2197
2198 lockdep_assert_held(&trace_types_lock);
2199
2200 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2201 if (!tr->clear_trace)
2202 continue;
2203 tr->clear_trace = false;
2204 tracing_reset_online_cpus(&tr->array_buffer);
2205 #ifdef CONFIG_TRACER_MAX_TRACE
2206 tracing_reset_online_cpus(&tr->max_buffer);
2207 #endif
2208 }
2209 }
2210
tracing_reset_all_online_cpus(void)2211 void tracing_reset_all_online_cpus(void)
2212 {
2213 mutex_lock(&trace_types_lock);
2214 tracing_reset_all_online_cpus_unlocked();
2215 mutex_unlock(&trace_types_lock);
2216 }
2217
2218 /*
2219 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2220 * is the tgid last observed corresponding to pid=i.
2221 */
2222 static int *tgid_map;
2223
2224 /* The maximum valid index into tgid_map. */
2225 static size_t tgid_map_max;
2226
2227 #define SAVED_CMDLINES_DEFAULT 128
2228 #define NO_CMDLINE_MAP UINT_MAX
2229 /*
2230 * Preemption must be disabled before acquiring trace_cmdline_lock.
2231 * The various trace_arrays' max_lock must be acquired in a context
2232 * where interrupt is disabled.
2233 */
2234 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2235 struct saved_cmdlines_buffer {
2236 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2237 unsigned *map_cmdline_to_pid;
2238 unsigned cmdline_num;
2239 int cmdline_idx;
2240 char saved_cmdlines[];
2241 };
2242 static struct saved_cmdlines_buffer *savedcmd;
2243
get_saved_cmdlines(int idx)2244 static inline char *get_saved_cmdlines(int idx)
2245 {
2246 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2247 }
2248
set_cmdline(int idx,const char * cmdline)2249 static inline void set_cmdline(int idx, const char *cmdline)
2250 {
2251 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2252 }
2253
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2254 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2255 {
2256 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2257
2258 kfree(s->map_cmdline_to_pid);
2259 kmemleak_free(s);
2260 free_pages((unsigned long)s, order);
2261 }
2262
allocate_cmdlines_buffer(unsigned int val)2263 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2264 {
2265 struct saved_cmdlines_buffer *s;
2266 struct page *page;
2267 int orig_size, size;
2268 int order;
2269
2270 /* Figure out how much is needed to hold the given number of cmdlines */
2271 orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2272 order = get_order(orig_size);
2273 size = 1 << (order + PAGE_SHIFT);
2274 page = alloc_pages(GFP_KERNEL, order);
2275 if (!page)
2276 return NULL;
2277
2278 s = page_address(page);
2279 kmemleak_alloc(s, size, 1, GFP_KERNEL);
2280 memset(s, 0, sizeof(*s));
2281
2282 /* Round up to actual allocation */
2283 val = (size - sizeof(*s)) / TASK_COMM_LEN;
2284 s->cmdline_num = val;
2285
2286 s->map_cmdline_to_pid = kmalloc_array(val,
2287 sizeof(*s->map_cmdline_to_pid),
2288 GFP_KERNEL);
2289 if (!s->map_cmdline_to_pid) {
2290 free_saved_cmdlines_buffer(s);
2291 return NULL;
2292 }
2293
2294 s->cmdline_idx = 0;
2295 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2296 sizeof(s->map_pid_to_cmdline));
2297 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2298 val * sizeof(*s->map_cmdline_to_pid));
2299
2300 return s;
2301 }
2302
trace_create_savedcmd(void)2303 static int trace_create_savedcmd(void)
2304 {
2305 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2306
2307 return savedcmd ? 0 : -ENOMEM;
2308 }
2309
is_tracing_stopped(void)2310 int is_tracing_stopped(void)
2311 {
2312 return global_trace.stop_count;
2313 }
2314
tracing_start_tr(struct trace_array * tr)2315 static void tracing_start_tr(struct trace_array *tr)
2316 {
2317 struct trace_buffer *buffer;
2318 unsigned long flags;
2319
2320 if (tracing_disabled)
2321 return;
2322
2323 raw_spin_lock_irqsave(&tr->start_lock, flags);
2324 if (--tr->stop_count) {
2325 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2326 /* Someone screwed up their debugging */
2327 tr->stop_count = 0;
2328 }
2329 goto out;
2330 }
2331
2332 /* Prevent the buffers from switching */
2333 arch_spin_lock(&tr->max_lock);
2334
2335 buffer = tr->array_buffer.buffer;
2336 if (buffer)
2337 ring_buffer_record_enable(buffer);
2338
2339 #ifdef CONFIG_TRACER_MAX_TRACE
2340 buffer = tr->max_buffer.buffer;
2341 if (buffer)
2342 ring_buffer_record_enable(buffer);
2343 #endif
2344
2345 arch_spin_unlock(&tr->max_lock);
2346
2347 out:
2348 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2349 }
2350
2351 /**
2352 * tracing_start - quick start of the tracer
2353 *
2354 * If tracing is enabled but was stopped by tracing_stop,
2355 * this will start the tracer back up.
2356 */
tracing_start(void)2357 void tracing_start(void)
2358
2359 {
2360 return tracing_start_tr(&global_trace);
2361 }
2362
tracing_stop_tr(struct trace_array * tr)2363 static void tracing_stop_tr(struct trace_array *tr)
2364 {
2365 struct trace_buffer *buffer;
2366 unsigned long flags;
2367
2368 raw_spin_lock_irqsave(&tr->start_lock, flags);
2369 if (tr->stop_count++)
2370 goto out;
2371
2372 /* Prevent the buffers from switching */
2373 arch_spin_lock(&tr->max_lock);
2374
2375 buffer = tr->array_buffer.buffer;
2376 if (buffer)
2377 ring_buffer_record_disable(buffer);
2378
2379 #ifdef CONFIG_TRACER_MAX_TRACE
2380 buffer = tr->max_buffer.buffer;
2381 if (buffer)
2382 ring_buffer_record_disable(buffer);
2383 #endif
2384
2385 arch_spin_unlock(&tr->max_lock);
2386
2387 out:
2388 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2389 }
2390
2391 /**
2392 * tracing_stop - quick stop of the tracer
2393 *
2394 * Light weight way to stop tracing. Use in conjunction with
2395 * tracing_start.
2396 */
tracing_stop(void)2397 void tracing_stop(void)
2398 {
2399 return tracing_stop_tr(&global_trace);
2400 }
2401
trace_save_cmdline(struct task_struct * tsk)2402 static int trace_save_cmdline(struct task_struct *tsk)
2403 {
2404 unsigned tpid, idx;
2405
2406 /* treat recording of idle task as a success */
2407 if (!tsk->pid)
2408 return 1;
2409
2410 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2411
2412 /*
2413 * It's not the end of the world if we don't get
2414 * the lock, but we also don't want to spin
2415 * nor do we want to disable interrupts,
2416 * so if we miss here, then better luck next time.
2417 *
2418 * This is called within the scheduler and wake up, so interrupts
2419 * had better been disabled and run queue lock been held.
2420 */
2421 lockdep_assert_preemption_disabled();
2422 if (!arch_spin_trylock(&trace_cmdline_lock))
2423 return 0;
2424
2425 idx = savedcmd->map_pid_to_cmdline[tpid];
2426 if (idx == NO_CMDLINE_MAP) {
2427 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2428
2429 savedcmd->map_pid_to_cmdline[tpid] = idx;
2430 savedcmd->cmdline_idx = idx;
2431 }
2432
2433 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2434 set_cmdline(idx, tsk->comm);
2435
2436 arch_spin_unlock(&trace_cmdline_lock);
2437
2438 return 1;
2439 }
2440
__trace_find_cmdline(int pid,char comm[])2441 static void __trace_find_cmdline(int pid, char comm[])
2442 {
2443 unsigned map;
2444 int tpid;
2445
2446 if (!pid) {
2447 strcpy(comm, "<idle>");
2448 return;
2449 }
2450
2451 if (WARN_ON_ONCE(pid < 0)) {
2452 strcpy(comm, "<XXX>");
2453 return;
2454 }
2455
2456 tpid = pid & (PID_MAX_DEFAULT - 1);
2457 map = savedcmd->map_pid_to_cmdline[tpid];
2458 if (map != NO_CMDLINE_MAP) {
2459 tpid = savedcmd->map_cmdline_to_pid[map];
2460 if (tpid == pid) {
2461 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2462 return;
2463 }
2464 }
2465 strcpy(comm, "<...>");
2466 }
2467
trace_find_cmdline(int pid,char comm[])2468 void trace_find_cmdline(int pid, char comm[])
2469 {
2470 preempt_disable();
2471 arch_spin_lock(&trace_cmdline_lock);
2472
2473 __trace_find_cmdline(pid, comm);
2474
2475 arch_spin_unlock(&trace_cmdline_lock);
2476 preempt_enable();
2477 }
2478
trace_find_tgid_ptr(int pid)2479 static int *trace_find_tgid_ptr(int pid)
2480 {
2481 /*
2482 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2483 * if we observe a non-NULL tgid_map then we also observe the correct
2484 * tgid_map_max.
2485 */
2486 int *map = smp_load_acquire(&tgid_map);
2487
2488 if (unlikely(!map || pid > tgid_map_max))
2489 return NULL;
2490
2491 return &map[pid];
2492 }
2493
trace_find_tgid(int pid)2494 int trace_find_tgid(int pid)
2495 {
2496 int *ptr = trace_find_tgid_ptr(pid);
2497
2498 return ptr ? *ptr : 0;
2499 }
2500
trace_save_tgid(struct task_struct * tsk)2501 static int trace_save_tgid(struct task_struct *tsk)
2502 {
2503 int *ptr;
2504
2505 /* treat recording of idle task as a success */
2506 if (!tsk->pid)
2507 return 1;
2508
2509 ptr = trace_find_tgid_ptr(tsk->pid);
2510 if (!ptr)
2511 return 0;
2512
2513 *ptr = tsk->tgid;
2514 return 1;
2515 }
2516
tracing_record_taskinfo_skip(int flags)2517 static bool tracing_record_taskinfo_skip(int flags)
2518 {
2519 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2520 return true;
2521 if (!__this_cpu_read(trace_taskinfo_save))
2522 return true;
2523 return false;
2524 }
2525
2526 /**
2527 * tracing_record_taskinfo - record the task info of a task
2528 *
2529 * @task: task to record
2530 * @flags: TRACE_RECORD_CMDLINE for recording comm
2531 * TRACE_RECORD_TGID for recording tgid
2532 */
tracing_record_taskinfo(struct task_struct * task,int flags)2533 void tracing_record_taskinfo(struct task_struct *task, int flags)
2534 {
2535 bool done;
2536
2537 if (tracing_record_taskinfo_skip(flags))
2538 return;
2539
2540 /*
2541 * Record as much task information as possible. If some fail, continue
2542 * to try to record the others.
2543 */
2544 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2545 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2546
2547 /* If recording any information failed, retry again soon. */
2548 if (!done)
2549 return;
2550
2551 __this_cpu_write(trace_taskinfo_save, false);
2552 }
2553
2554 /**
2555 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2556 *
2557 * @prev: previous task during sched_switch
2558 * @next: next task during sched_switch
2559 * @flags: TRACE_RECORD_CMDLINE for recording comm
2560 * TRACE_RECORD_TGID for recording tgid
2561 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2562 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2563 struct task_struct *next, int flags)
2564 {
2565 bool done;
2566
2567 if (tracing_record_taskinfo_skip(flags))
2568 return;
2569
2570 /*
2571 * Record as much task information as possible. If some fail, continue
2572 * to try to record the others.
2573 */
2574 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2575 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2576 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2577 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2578
2579 /* If recording any information failed, retry again soon. */
2580 if (!done)
2581 return;
2582
2583 __this_cpu_write(trace_taskinfo_save, false);
2584 }
2585
2586 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2587 void tracing_record_cmdline(struct task_struct *task)
2588 {
2589 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2590 }
2591
tracing_record_tgid(struct task_struct * task)2592 void tracing_record_tgid(struct task_struct *task)
2593 {
2594 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2595 }
2596
2597 /*
2598 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2599 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2600 * simplifies those functions and keeps them in sync.
2601 */
trace_handle_return(struct trace_seq * s)2602 enum print_line_t trace_handle_return(struct trace_seq *s)
2603 {
2604 return trace_seq_has_overflowed(s) ?
2605 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2606 }
2607 EXPORT_SYMBOL_GPL(trace_handle_return);
2608
migration_disable_value(void)2609 static unsigned short migration_disable_value(void)
2610 {
2611 #if defined(CONFIG_SMP)
2612 return current->migration_disabled;
2613 #else
2614 return 0;
2615 #endif
2616 }
2617
tracing_gen_ctx_irq_test(unsigned int irqs_status)2618 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2619 {
2620 unsigned int trace_flags = irqs_status;
2621 unsigned int pc;
2622
2623 pc = preempt_count();
2624
2625 if (pc & NMI_MASK)
2626 trace_flags |= TRACE_FLAG_NMI;
2627 if (pc & HARDIRQ_MASK)
2628 trace_flags |= TRACE_FLAG_HARDIRQ;
2629 if (in_serving_softirq())
2630 trace_flags |= TRACE_FLAG_SOFTIRQ;
2631
2632 if (tif_need_resched())
2633 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2634 if (test_preempt_need_resched())
2635 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2636 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2637 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2638 }
2639
2640 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2641 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2642 int type,
2643 unsigned long len,
2644 unsigned int trace_ctx)
2645 {
2646 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2647 }
2648
2649 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2650 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2651 static int trace_buffered_event_ref;
2652
2653 /**
2654 * trace_buffered_event_enable - enable buffering events
2655 *
2656 * When events are being filtered, it is quicker to use a temporary
2657 * buffer to write the event data into if there's a likely chance
2658 * that it will not be committed. The discard of the ring buffer
2659 * is not as fast as committing, and is much slower than copying
2660 * a commit.
2661 *
2662 * When an event is to be filtered, allocate per cpu buffers to
2663 * write the event data into, and if the event is filtered and discarded
2664 * it is simply dropped, otherwise, the entire data is to be committed
2665 * in one shot.
2666 */
trace_buffered_event_enable(void)2667 void trace_buffered_event_enable(void)
2668 {
2669 struct ring_buffer_event *event;
2670 struct page *page;
2671 int cpu;
2672
2673 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2674
2675 if (trace_buffered_event_ref++)
2676 return;
2677
2678 for_each_tracing_cpu(cpu) {
2679 page = alloc_pages_node(cpu_to_node(cpu),
2680 GFP_KERNEL | __GFP_NORETRY, 0);
2681 /* This is just an optimization and can handle failures */
2682 if (!page) {
2683 pr_err("Failed to allocate event buffer\n");
2684 break;
2685 }
2686
2687 event = page_address(page);
2688 memset(event, 0, sizeof(*event));
2689
2690 per_cpu(trace_buffered_event, cpu) = event;
2691
2692 preempt_disable();
2693 if (cpu == smp_processor_id() &&
2694 __this_cpu_read(trace_buffered_event) !=
2695 per_cpu(trace_buffered_event, cpu))
2696 WARN_ON_ONCE(1);
2697 preempt_enable();
2698 }
2699 }
2700
enable_trace_buffered_event(void * data)2701 static void enable_trace_buffered_event(void *data)
2702 {
2703 /* Probably not needed, but do it anyway */
2704 smp_rmb();
2705 this_cpu_dec(trace_buffered_event_cnt);
2706 }
2707
disable_trace_buffered_event(void * data)2708 static void disable_trace_buffered_event(void *data)
2709 {
2710 this_cpu_inc(trace_buffered_event_cnt);
2711 }
2712
2713 /**
2714 * trace_buffered_event_disable - disable buffering events
2715 *
2716 * When a filter is removed, it is faster to not use the buffered
2717 * events, and to commit directly into the ring buffer. Free up
2718 * the temp buffers when there are no more users. This requires
2719 * special synchronization with current events.
2720 */
trace_buffered_event_disable(void)2721 void trace_buffered_event_disable(void)
2722 {
2723 int cpu;
2724
2725 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2726
2727 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2728 return;
2729
2730 if (--trace_buffered_event_ref)
2731 return;
2732
2733 /* For each CPU, set the buffer as used. */
2734 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2735 NULL, true);
2736
2737 /* Wait for all current users to finish */
2738 synchronize_rcu();
2739
2740 for_each_tracing_cpu(cpu) {
2741 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2742 per_cpu(trace_buffered_event, cpu) = NULL;
2743 }
2744
2745 /*
2746 * Wait for all CPUs that potentially started checking if they can use
2747 * their event buffer only after the previous synchronize_rcu() call and
2748 * they still read a valid pointer from trace_buffered_event. It must be
2749 * ensured they don't see cleared trace_buffered_event_cnt else they
2750 * could wrongly decide to use the pointed-to buffer which is now freed.
2751 */
2752 synchronize_rcu();
2753
2754 /* For each CPU, relinquish the buffer */
2755 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2756 true);
2757 }
2758
2759 static struct trace_buffer *temp_buffer;
2760
2761 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2762 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2763 struct trace_event_file *trace_file,
2764 int type, unsigned long len,
2765 unsigned int trace_ctx)
2766 {
2767 struct ring_buffer_event *entry;
2768 struct trace_array *tr = trace_file->tr;
2769 int val;
2770
2771 *current_rb = tr->array_buffer.buffer;
2772
2773 if (!tr->no_filter_buffering_ref &&
2774 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2775 (entry = this_cpu_read(trace_buffered_event))) {
2776 /*
2777 * Filtering is on, so try to use the per cpu buffer first.
2778 * This buffer will simulate a ring_buffer_event,
2779 * where the type_len is zero and the array[0] will
2780 * hold the full length.
2781 * (see include/linux/ring-buffer.h for details on
2782 * how the ring_buffer_event is structured).
2783 *
2784 * Using a temp buffer during filtering and copying it
2785 * on a matched filter is quicker than writing directly
2786 * into the ring buffer and then discarding it when
2787 * it doesn't match. That is because the discard
2788 * requires several atomic operations to get right.
2789 * Copying on match and doing nothing on a failed match
2790 * is still quicker than no copy on match, but having
2791 * to discard out of the ring buffer on a failed match.
2792 */
2793 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2794
2795 val = this_cpu_inc_return(trace_buffered_event_cnt);
2796
2797 /*
2798 * Preemption is disabled, but interrupts and NMIs
2799 * can still come in now. If that happens after
2800 * the above increment, then it will have to go
2801 * back to the old method of allocating the event
2802 * on the ring buffer, and if the filter fails, it
2803 * will have to call ring_buffer_discard_commit()
2804 * to remove it.
2805 *
2806 * Need to also check the unlikely case that the
2807 * length is bigger than the temp buffer size.
2808 * If that happens, then the reserve is pretty much
2809 * guaranteed to fail, as the ring buffer currently
2810 * only allows events less than a page. But that may
2811 * change in the future, so let the ring buffer reserve
2812 * handle the failure in that case.
2813 */
2814 if (val == 1 && likely(len <= max_len)) {
2815 trace_event_setup(entry, type, trace_ctx);
2816 entry->array[0] = len;
2817 return entry;
2818 }
2819 this_cpu_dec(trace_buffered_event_cnt);
2820 }
2821
2822 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2823 trace_ctx);
2824 /*
2825 * If tracing is off, but we have triggers enabled
2826 * we still need to look at the event data. Use the temp_buffer
2827 * to store the trace event for the trigger to use. It's recursive
2828 * safe and will not be recorded anywhere.
2829 */
2830 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2831 *current_rb = temp_buffer;
2832 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2833 trace_ctx);
2834 }
2835 return entry;
2836 }
2837 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2838
2839 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2840 static DEFINE_MUTEX(tracepoint_printk_mutex);
2841
output_printk(struct trace_event_buffer * fbuffer)2842 static void output_printk(struct trace_event_buffer *fbuffer)
2843 {
2844 struct trace_event_call *event_call;
2845 struct trace_event_file *file;
2846 struct trace_event *event;
2847 unsigned long flags;
2848 struct trace_iterator *iter = tracepoint_print_iter;
2849
2850 /* We should never get here if iter is NULL */
2851 if (WARN_ON_ONCE(!iter))
2852 return;
2853
2854 event_call = fbuffer->trace_file->event_call;
2855 if (!event_call || !event_call->event.funcs ||
2856 !event_call->event.funcs->trace)
2857 return;
2858
2859 file = fbuffer->trace_file;
2860 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2861 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2862 !filter_match_preds(file->filter, fbuffer->entry)))
2863 return;
2864
2865 event = &fbuffer->trace_file->event_call->event;
2866
2867 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2868 trace_seq_init(&iter->seq);
2869 iter->ent = fbuffer->entry;
2870 event_call->event.funcs->trace(iter, 0, event);
2871 trace_seq_putc(&iter->seq, 0);
2872 printk("%s", iter->seq.buffer);
2873
2874 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2875 }
2876
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2877 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2878 void *buffer, size_t *lenp,
2879 loff_t *ppos)
2880 {
2881 int save_tracepoint_printk;
2882 int ret;
2883
2884 mutex_lock(&tracepoint_printk_mutex);
2885 save_tracepoint_printk = tracepoint_printk;
2886
2887 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2888
2889 /*
2890 * This will force exiting early, as tracepoint_printk
2891 * is always zero when tracepoint_printk_iter is not allocated
2892 */
2893 if (!tracepoint_print_iter)
2894 tracepoint_printk = 0;
2895
2896 if (save_tracepoint_printk == tracepoint_printk)
2897 goto out;
2898
2899 if (tracepoint_printk)
2900 static_key_enable(&tracepoint_printk_key.key);
2901 else
2902 static_key_disable(&tracepoint_printk_key.key);
2903
2904 out:
2905 mutex_unlock(&tracepoint_printk_mutex);
2906
2907 return ret;
2908 }
2909
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2910 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2911 {
2912 enum event_trigger_type tt = ETT_NONE;
2913 struct trace_event_file *file = fbuffer->trace_file;
2914
2915 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2916 fbuffer->entry, &tt))
2917 goto discard;
2918
2919 if (static_key_false(&tracepoint_printk_key.key))
2920 output_printk(fbuffer);
2921
2922 if (static_branch_unlikely(&trace_event_exports_enabled))
2923 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2924
2925 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2926 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2927
2928 discard:
2929 if (tt)
2930 event_triggers_post_call(file, tt);
2931
2932 }
2933 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2934
2935 /*
2936 * Skip 3:
2937 *
2938 * trace_buffer_unlock_commit_regs()
2939 * trace_event_buffer_commit()
2940 * trace_event_raw_event_xxx()
2941 */
2942 # define STACK_SKIP 3
2943
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2944 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2945 struct trace_buffer *buffer,
2946 struct ring_buffer_event *event,
2947 unsigned int trace_ctx,
2948 struct pt_regs *regs)
2949 {
2950 __buffer_unlock_commit(buffer, event);
2951
2952 /*
2953 * If regs is not set, then skip the necessary functions.
2954 * Note, we can still get here via blktrace, wakeup tracer
2955 * and mmiotrace, but that's ok if they lose a function or
2956 * two. They are not that meaningful.
2957 */
2958 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2959 ftrace_trace_userstack(tr, buffer, trace_ctx);
2960 }
2961
2962 /*
2963 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2964 */
2965 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2966 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2967 struct ring_buffer_event *event)
2968 {
2969 __buffer_unlock_commit(buffer, event);
2970 }
2971
2972 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)2973 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2974 parent_ip, unsigned int trace_ctx)
2975 {
2976 struct trace_event_call *call = &event_function;
2977 struct trace_buffer *buffer = tr->array_buffer.buffer;
2978 struct ring_buffer_event *event;
2979 struct ftrace_entry *entry;
2980
2981 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2982 trace_ctx);
2983 if (!event)
2984 return;
2985 entry = ring_buffer_event_data(event);
2986 entry->ip = ip;
2987 entry->parent_ip = parent_ip;
2988
2989 if (!call_filter_check_discard(call, entry, buffer, event)) {
2990 if (static_branch_unlikely(&trace_function_exports_enabled))
2991 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2992 __buffer_unlock_commit(buffer, event);
2993 }
2994 }
2995
2996 #ifdef CONFIG_STACKTRACE
2997
2998 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2999 #define FTRACE_KSTACK_NESTING 4
3000
3001 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3002
3003 struct ftrace_stack {
3004 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3005 };
3006
3007
3008 struct ftrace_stacks {
3009 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3010 };
3011
3012 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3013 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3014
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3015 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3016 unsigned int trace_ctx,
3017 int skip, struct pt_regs *regs)
3018 {
3019 struct trace_event_call *call = &event_kernel_stack;
3020 struct ring_buffer_event *event;
3021 unsigned int size, nr_entries;
3022 struct ftrace_stack *fstack;
3023 struct stack_entry *entry;
3024 int stackidx;
3025
3026 /*
3027 * Add one, for this function and the call to save_stack_trace()
3028 * If regs is set, then these functions will not be in the way.
3029 */
3030 #ifndef CONFIG_UNWINDER_ORC
3031 if (!regs)
3032 skip++;
3033 #endif
3034
3035 preempt_disable_notrace();
3036
3037 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3038
3039 /* This should never happen. If it does, yell once and skip */
3040 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3041 goto out;
3042
3043 /*
3044 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3045 * interrupt will either see the value pre increment or post
3046 * increment. If the interrupt happens pre increment it will have
3047 * restored the counter when it returns. We just need a barrier to
3048 * keep gcc from moving things around.
3049 */
3050 barrier();
3051
3052 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3053 size = ARRAY_SIZE(fstack->calls);
3054
3055 if (regs) {
3056 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3057 size, skip);
3058 } else {
3059 nr_entries = stack_trace_save(fstack->calls, size, skip);
3060 }
3061
3062 size = nr_entries * sizeof(unsigned long);
3063 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3064 (sizeof(*entry) - sizeof(entry->caller)) + size,
3065 trace_ctx);
3066 if (!event)
3067 goto out;
3068 entry = ring_buffer_event_data(event);
3069
3070 memcpy(&entry->caller, fstack->calls, size);
3071 entry->size = nr_entries;
3072
3073 if (!call_filter_check_discard(call, entry, buffer, event))
3074 __buffer_unlock_commit(buffer, event);
3075
3076 out:
3077 /* Again, don't let gcc optimize things here */
3078 barrier();
3079 __this_cpu_dec(ftrace_stack_reserve);
3080 preempt_enable_notrace();
3081
3082 }
3083
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3084 static inline void ftrace_trace_stack(struct trace_array *tr,
3085 struct trace_buffer *buffer,
3086 unsigned int trace_ctx,
3087 int skip, struct pt_regs *regs)
3088 {
3089 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3090 return;
3091
3092 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3093 }
3094
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3095 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3096 int skip)
3097 {
3098 struct trace_buffer *buffer = tr->array_buffer.buffer;
3099
3100 if (rcu_is_watching()) {
3101 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3102 return;
3103 }
3104
3105 /*
3106 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3107 * but if the above rcu_is_watching() failed, then the NMI
3108 * triggered someplace critical, and rcu_irq_enter() should
3109 * not be called from NMI.
3110 */
3111 if (unlikely(in_nmi()))
3112 return;
3113
3114 rcu_irq_enter_irqson();
3115 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3116 rcu_irq_exit_irqson();
3117 }
3118
3119 /**
3120 * trace_dump_stack - record a stack back trace in the trace buffer
3121 * @skip: Number of functions to skip (helper handlers)
3122 */
trace_dump_stack(int skip)3123 void trace_dump_stack(int skip)
3124 {
3125 if (tracing_disabled || tracing_selftest_running)
3126 return;
3127
3128 #ifndef CONFIG_UNWINDER_ORC
3129 /* Skip 1 to skip this function. */
3130 skip++;
3131 #endif
3132 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3133 tracing_gen_ctx(), skip, NULL);
3134 }
3135 EXPORT_SYMBOL_GPL(trace_dump_stack);
3136
3137 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3138 static DEFINE_PER_CPU(int, user_stack_count);
3139
3140 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3141 ftrace_trace_userstack(struct trace_array *tr,
3142 struct trace_buffer *buffer, unsigned int trace_ctx)
3143 {
3144 struct trace_event_call *call = &event_user_stack;
3145 struct ring_buffer_event *event;
3146 struct userstack_entry *entry;
3147
3148 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3149 return;
3150
3151 /*
3152 * NMIs can not handle page faults, even with fix ups.
3153 * The save user stack can (and often does) fault.
3154 */
3155 if (unlikely(in_nmi()))
3156 return;
3157
3158 /*
3159 * prevent recursion, since the user stack tracing may
3160 * trigger other kernel events.
3161 */
3162 preempt_disable();
3163 if (__this_cpu_read(user_stack_count))
3164 goto out;
3165
3166 __this_cpu_inc(user_stack_count);
3167
3168 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3169 sizeof(*entry), trace_ctx);
3170 if (!event)
3171 goto out_drop_count;
3172 entry = ring_buffer_event_data(event);
3173
3174 entry->tgid = current->tgid;
3175 memset(&entry->caller, 0, sizeof(entry->caller));
3176
3177 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3178 if (!call_filter_check_discard(call, entry, buffer, event))
3179 __buffer_unlock_commit(buffer, event);
3180
3181 out_drop_count:
3182 __this_cpu_dec(user_stack_count);
3183 out:
3184 preempt_enable();
3185 }
3186 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3187 static void ftrace_trace_userstack(struct trace_array *tr,
3188 struct trace_buffer *buffer,
3189 unsigned int trace_ctx)
3190 {
3191 }
3192 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3193
3194 #endif /* CONFIG_STACKTRACE */
3195
3196 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3197 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3198 unsigned long long delta)
3199 {
3200 entry->bottom_delta_ts = delta & U32_MAX;
3201 entry->top_delta_ts = (delta >> 32);
3202 }
3203
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3204 void trace_last_func_repeats(struct trace_array *tr,
3205 struct trace_func_repeats *last_info,
3206 unsigned int trace_ctx)
3207 {
3208 struct trace_buffer *buffer = tr->array_buffer.buffer;
3209 struct func_repeats_entry *entry;
3210 struct ring_buffer_event *event;
3211 u64 delta;
3212
3213 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3214 sizeof(*entry), trace_ctx);
3215 if (!event)
3216 return;
3217
3218 delta = ring_buffer_event_time_stamp(buffer, event) -
3219 last_info->ts_last_call;
3220
3221 entry = ring_buffer_event_data(event);
3222 entry->ip = last_info->ip;
3223 entry->parent_ip = last_info->parent_ip;
3224 entry->count = last_info->count;
3225 func_repeats_set_delta_ts(entry, delta);
3226
3227 __buffer_unlock_commit(buffer, event);
3228 }
3229
3230 /* created for use with alloc_percpu */
3231 struct trace_buffer_struct {
3232 int nesting;
3233 char buffer[4][TRACE_BUF_SIZE];
3234 };
3235
3236 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3237
3238 /*
3239 * This allows for lockless recording. If we're nested too deeply, then
3240 * this returns NULL.
3241 */
get_trace_buf(void)3242 static char *get_trace_buf(void)
3243 {
3244 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3245
3246 if (!trace_percpu_buffer || buffer->nesting >= 4)
3247 return NULL;
3248
3249 buffer->nesting++;
3250
3251 /* Interrupts must see nesting incremented before we use the buffer */
3252 barrier();
3253 return &buffer->buffer[buffer->nesting - 1][0];
3254 }
3255
put_trace_buf(void)3256 static void put_trace_buf(void)
3257 {
3258 /* Don't let the decrement of nesting leak before this */
3259 barrier();
3260 this_cpu_dec(trace_percpu_buffer->nesting);
3261 }
3262
alloc_percpu_trace_buffer(void)3263 static int alloc_percpu_trace_buffer(void)
3264 {
3265 struct trace_buffer_struct __percpu *buffers;
3266
3267 if (trace_percpu_buffer)
3268 return 0;
3269
3270 buffers = alloc_percpu(struct trace_buffer_struct);
3271 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3272 return -ENOMEM;
3273
3274 trace_percpu_buffer = buffers;
3275 return 0;
3276 }
3277
3278 static int buffers_allocated;
3279
trace_printk_init_buffers(void)3280 void trace_printk_init_buffers(void)
3281 {
3282 if (buffers_allocated)
3283 return;
3284
3285 if (alloc_percpu_trace_buffer())
3286 return;
3287
3288 /* trace_printk() is for debug use only. Don't use it in production. */
3289
3290 pr_warn("\n");
3291 pr_warn("**********************************************************\n");
3292 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3293 pr_warn("** **\n");
3294 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3295 pr_warn("** **\n");
3296 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3297 pr_warn("** unsafe for production use. **\n");
3298 pr_warn("** **\n");
3299 pr_warn("** If you see this message and you are not debugging **\n");
3300 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3301 pr_warn("** **\n");
3302 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3303 pr_warn("**********************************************************\n");
3304
3305 /* Expand the buffers to set size */
3306 tracing_update_buffers();
3307
3308 buffers_allocated = 1;
3309
3310 /*
3311 * trace_printk_init_buffers() can be called by modules.
3312 * If that happens, then we need to start cmdline recording
3313 * directly here. If the global_trace.buffer is already
3314 * allocated here, then this was called by module code.
3315 */
3316 if (global_trace.array_buffer.buffer)
3317 tracing_start_cmdline_record();
3318 }
3319 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3320
trace_printk_start_comm(void)3321 void trace_printk_start_comm(void)
3322 {
3323 /* Start tracing comms if trace printk is set */
3324 if (!buffers_allocated)
3325 return;
3326 tracing_start_cmdline_record();
3327 }
3328
trace_printk_start_stop_comm(int enabled)3329 static void trace_printk_start_stop_comm(int enabled)
3330 {
3331 if (!buffers_allocated)
3332 return;
3333
3334 if (enabled)
3335 tracing_start_cmdline_record();
3336 else
3337 tracing_stop_cmdline_record();
3338 }
3339
3340 /**
3341 * trace_vbprintk - write binary msg to tracing buffer
3342 * @ip: The address of the caller
3343 * @fmt: The string format to write to the buffer
3344 * @args: Arguments for @fmt
3345 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3346 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3347 {
3348 struct trace_event_call *call = &event_bprint;
3349 struct ring_buffer_event *event;
3350 struct trace_buffer *buffer;
3351 struct trace_array *tr = &global_trace;
3352 struct bprint_entry *entry;
3353 unsigned int trace_ctx;
3354 char *tbuffer;
3355 int len = 0, size;
3356
3357 if (unlikely(tracing_selftest_running || tracing_disabled))
3358 return 0;
3359
3360 /* Don't pollute graph traces with trace_vprintk internals */
3361 pause_graph_tracing();
3362
3363 trace_ctx = tracing_gen_ctx();
3364 preempt_disable_notrace();
3365
3366 tbuffer = get_trace_buf();
3367 if (!tbuffer) {
3368 len = 0;
3369 goto out_nobuffer;
3370 }
3371
3372 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3373
3374 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3375 goto out_put;
3376
3377 size = sizeof(*entry) + sizeof(u32) * len;
3378 buffer = tr->array_buffer.buffer;
3379 ring_buffer_nest_start(buffer);
3380 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3381 trace_ctx);
3382 if (!event)
3383 goto out;
3384 entry = ring_buffer_event_data(event);
3385 entry->ip = ip;
3386 entry->fmt = fmt;
3387
3388 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3389 if (!call_filter_check_discard(call, entry, buffer, event)) {
3390 __buffer_unlock_commit(buffer, event);
3391 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3392 }
3393
3394 out:
3395 ring_buffer_nest_end(buffer);
3396 out_put:
3397 put_trace_buf();
3398
3399 out_nobuffer:
3400 preempt_enable_notrace();
3401 unpause_graph_tracing();
3402
3403 return len;
3404 }
3405 EXPORT_SYMBOL_GPL(trace_vbprintk);
3406
3407 __printf(3, 0)
3408 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3409 __trace_array_vprintk(struct trace_buffer *buffer,
3410 unsigned long ip, const char *fmt, va_list args)
3411 {
3412 struct trace_event_call *call = &event_print;
3413 struct ring_buffer_event *event;
3414 int len = 0, size;
3415 struct print_entry *entry;
3416 unsigned int trace_ctx;
3417 char *tbuffer;
3418
3419 if (tracing_disabled || tracing_selftest_running)
3420 return 0;
3421
3422 /* Don't pollute graph traces with trace_vprintk internals */
3423 pause_graph_tracing();
3424
3425 trace_ctx = tracing_gen_ctx();
3426 preempt_disable_notrace();
3427
3428
3429 tbuffer = get_trace_buf();
3430 if (!tbuffer) {
3431 len = 0;
3432 goto out_nobuffer;
3433 }
3434
3435 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3436
3437 size = sizeof(*entry) + len + 1;
3438 ring_buffer_nest_start(buffer);
3439 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3440 trace_ctx);
3441 if (!event)
3442 goto out;
3443 entry = ring_buffer_event_data(event);
3444 entry->ip = ip;
3445
3446 memcpy(&entry->buf, tbuffer, len + 1);
3447 if (!call_filter_check_discard(call, entry, buffer, event)) {
3448 __buffer_unlock_commit(buffer, event);
3449 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3450 }
3451
3452 out:
3453 ring_buffer_nest_end(buffer);
3454 put_trace_buf();
3455
3456 out_nobuffer:
3457 preempt_enable_notrace();
3458 unpause_graph_tracing();
3459
3460 return len;
3461 }
3462
3463 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3464 int trace_array_vprintk(struct trace_array *tr,
3465 unsigned long ip, const char *fmt, va_list args)
3466 {
3467 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3468 }
3469
3470 /**
3471 * trace_array_printk - Print a message to a specific instance
3472 * @tr: The instance trace_array descriptor
3473 * @ip: The instruction pointer that this is called from.
3474 * @fmt: The format to print (printf format)
3475 *
3476 * If a subsystem sets up its own instance, they have the right to
3477 * printk strings into their tracing instance buffer using this
3478 * function. Note, this function will not write into the top level
3479 * buffer (use trace_printk() for that), as writing into the top level
3480 * buffer should only have events that can be individually disabled.
3481 * trace_printk() is only used for debugging a kernel, and should not
3482 * be ever incorporated in normal use.
3483 *
3484 * trace_array_printk() can be used, as it will not add noise to the
3485 * top level tracing buffer.
3486 *
3487 * Note, trace_array_init_printk() must be called on @tr before this
3488 * can be used.
3489 */
3490 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3491 int trace_array_printk(struct trace_array *tr,
3492 unsigned long ip, const char *fmt, ...)
3493 {
3494 int ret;
3495 va_list ap;
3496
3497 if (!tr)
3498 return -ENOENT;
3499
3500 /* This is only allowed for created instances */
3501 if (tr == &global_trace)
3502 return 0;
3503
3504 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3505 return 0;
3506
3507 va_start(ap, fmt);
3508 ret = trace_array_vprintk(tr, ip, fmt, ap);
3509 va_end(ap);
3510 return ret;
3511 }
3512 EXPORT_SYMBOL_GPL(trace_array_printk);
3513
3514 /**
3515 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3516 * @tr: The trace array to initialize the buffers for
3517 *
3518 * As trace_array_printk() only writes into instances, they are OK to
3519 * have in the kernel (unlike trace_printk()). This needs to be called
3520 * before trace_array_printk() can be used on a trace_array.
3521 */
trace_array_init_printk(struct trace_array * tr)3522 int trace_array_init_printk(struct trace_array *tr)
3523 {
3524 if (!tr)
3525 return -ENOENT;
3526
3527 /* This is only allowed for created instances */
3528 if (tr == &global_trace)
3529 return -EINVAL;
3530
3531 return alloc_percpu_trace_buffer();
3532 }
3533 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3534
3535 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3536 int trace_array_printk_buf(struct trace_buffer *buffer,
3537 unsigned long ip, const char *fmt, ...)
3538 {
3539 int ret;
3540 va_list ap;
3541
3542 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3543 return 0;
3544
3545 va_start(ap, fmt);
3546 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3547 va_end(ap);
3548 return ret;
3549 }
3550
3551 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3552 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3553 {
3554 return trace_array_vprintk(&global_trace, ip, fmt, args);
3555 }
3556 EXPORT_SYMBOL_GPL(trace_vprintk);
3557
trace_iterator_increment(struct trace_iterator * iter)3558 static void trace_iterator_increment(struct trace_iterator *iter)
3559 {
3560 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3561
3562 iter->idx++;
3563 if (buf_iter)
3564 ring_buffer_iter_advance(buf_iter);
3565 }
3566
3567 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3568 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3569 unsigned long *lost_events)
3570 {
3571 struct ring_buffer_event *event;
3572 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3573
3574 if (buf_iter) {
3575 event = ring_buffer_iter_peek(buf_iter, ts);
3576 if (lost_events)
3577 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3578 (unsigned long)-1 : 0;
3579 } else {
3580 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3581 lost_events);
3582 }
3583
3584 if (event) {
3585 iter->ent_size = ring_buffer_event_length(event);
3586 return ring_buffer_event_data(event);
3587 }
3588 iter->ent_size = 0;
3589 return NULL;
3590 }
3591
3592 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3593 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3594 unsigned long *missing_events, u64 *ent_ts)
3595 {
3596 struct trace_buffer *buffer = iter->array_buffer->buffer;
3597 struct trace_entry *ent, *next = NULL;
3598 unsigned long lost_events = 0, next_lost = 0;
3599 int cpu_file = iter->cpu_file;
3600 u64 next_ts = 0, ts;
3601 int next_cpu = -1;
3602 int next_size = 0;
3603 int cpu;
3604
3605 /*
3606 * If we are in a per_cpu trace file, don't bother by iterating over
3607 * all cpu and peek directly.
3608 */
3609 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3610 if (ring_buffer_empty_cpu(buffer, cpu_file))
3611 return NULL;
3612 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3613 if (ent_cpu)
3614 *ent_cpu = cpu_file;
3615
3616 return ent;
3617 }
3618
3619 for_each_tracing_cpu(cpu) {
3620
3621 if (ring_buffer_empty_cpu(buffer, cpu))
3622 continue;
3623
3624 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3625
3626 /*
3627 * Pick the entry with the smallest timestamp:
3628 */
3629 if (ent && (!next || ts < next_ts)) {
3630 next = ent;
3631 next_cpu = cpu;
3632 next_ts = ts;
3633 next_lost = lost_events;
3634 next_size = iter->ent_size;
3635 }
3636 }
3637
3638 iter->ent_size = next_size;
3639
3640 if (ent_cpu)
3641 *ent_cpu = next_cpu;
3642
3643 if (ent_ts)
3644 *ent_ts = next_ts;
3645
3646 if (missing_events)
3647 *missing_events = next_lost;
3648
3649 return next;
3650 }
3651
3652 #define STATIC_FMT_BUF_SIZE 128
3653 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3654
trace_iter_expand_format(struct trace_iterator * iter)3655 static char *trace_iter_expand_format(struct trace_iterator *iter)
3656 {
3657 char *tmp;
3658
3659 /*
3660 * iter->tr is NULL when used with tp_printk, which makes
3661 * this get called where it is not safe to call krealloc().
3662 */
3663 if (!iter->tr || iter->fmt == static_fmt_buf)
3664 return NULL;
3665
3666 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3667 GFP_KERNEL);
3668 if (tmp) {
3669 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3670 iter->fmt = tmp;
3671 }
3672
3673 return tmp;
3674 }
3675
3676 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str,bool star,int len)3677 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3678 bool star, int len)
3679 {
3680 unsigned long addr = (unsigned long)str;
3681 struct trace_event *trace_event;
3682 struct trace_event_call *event;
3683
3684 /* Ignore strings with no length */
3685 if (star && !len)
3686 return true;
3687
3688 /* OK if part of the event data */
3689 if ((addr >= (unsigned long)iter->ent) &&
3690 (addr < (unsigned long)iter->ent + iter->ent_size))
3691 return true;
3692
3693 /* OK if part of the temp seq buffer */
3694 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3695 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3696 return true;
3697
3698 /* Core rodata can not be freed */
3699 if (is_kernel_rodata(addr))
3700 return true;
3701
3702 if (trace_is_tracepoint_string(str))
3703 return true;
3704
3705 /*
3706 * Now this could be a module event, referencing core module
3707 * data, which is OK.
3708 */
3709 if (!iter->ent)
3710 return false;
3711
3712 trace_event = ftrace_find_event(iter->ent->type);
3713 if (!trace_event)
3714 return false;
3715
3716 event = container_of(trace_event, struct trace_event_call, event);
3717 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3718 return false;
3719
3720 /* Would rather have rodata, but this will suffice */
3721 if (within_module_core(addr, event->module))
3722 return true;
3723
3724 return false;
3725 }
3726
show_buffer(struct trace_seq * s)3727 static const char *show_buffer(struct trace_seq *s)
3728 {
3729 struct seq_buf *seq = &s->seq;
3730
3731 seq_buf_terminate(seq);
3732
3733 return seq->buffer;
3734 }
3735
3736 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3737
test_can_verify_check(const char * fmt,...)3738 static int test_can_verify_check(const char *fmt, ...)
3739 {
3740 char buf[16];
3741 va_list ap;
3742 int ret;
3743
3744 /*
3745 * The verifier is dependent on vsnprintf() modifies the va_list
3746 * passed to it, where it is sent as a reference. Some architectures
3747 * (like x86_32) passes it by value, which means that vsnprintf()
3748 * does not modify the va_list passed to it, and the verifier
3749 * would then need to be able to understand all the values that
3750 * vsnprintf can use. If it is passed by value, then the verifier
3751 * is disabled.
3752 */
3753 va_start(ap, fmt);
3754 vsnprintf(buf, 16, "%d", ap);
3755 ret = va_arg(ap, int);
3756 va_end(ap);
3757
3758 return ret;
3759 }
3760
test_can_verify(void)3761 static void test_can_verify(void)
3762 {
3763 if (!test_can_verify_check("%d %d", 0, 1)) {
3764 pr_info("trace event string verifier disabled\n");
3765 static_branch_inc(&trace_no_verify);
3766 }
3767 }
3768
3769 /**
3770 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3771 * @iter: The iterator that holds the seq buffer and the event being printed
3772 * @fmt: The format used to print the event
3773 * @ap: The va_list holding the data to print from @fmt.
3774 *
3775 * This writes the data into the @iter->seq buffer using the data from
3776 * @fmt and @ap. If the format has a %s, then the source of the string
3777 * is examined to make sure it is safe to print, otherwise it will
3778 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3779 * pointer.
3780 */
trace_check_vprintf(struct trace_iterator * iter,const char * fmt,va_list ap)3781 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3782 va_list ap)
3783 {
3784 const char *p = fmt;
3785 const char *str;
3786 int i, j;
3787
3788 if (WARN_ON_ONCE(!fmt))
3789 return;
3790
3791 if (static_branch_unlikely(&trace_no_verify))
3792 goto print;
3793
3794 /* Don't bother checking when doing a ftrace_dump() */
3795 if (iter->fmt == static_fmt_buf)
3796 goto print;
3797
3798 while (*p) {
3799 bool star = false;
3800 int len = 0;
3801
3802 j = 0;
3803
3804 /* We only care about %s and variants */
3805 for (i = 0; p[i]; i++) {
3806 if (i + 1 >= iter->fmt_size) {
3807 /*
3808 * If we can't expand the copy buffer,
3809 * just print it.
3810 */
3811 if (!trace_iter_expand_format(iter))
3812 goto print;
3813 }
3814
3815 if (p[i] == '\\' && p[i+1]) {
3816 i++;
3817 continue;
3818 }
3819 if (p[i] == '%') {
3820 /* Need to test cases like %08.*s */
3821 for (j = 1; p[i+j]; j++) {
3822 if (isdigit(p[i+j]) ||
3823 p[i+j] == '.')
3824 continue;
3825 if (p[i+j] == '*') {
3826 star = true;
3827 continue;
3828 }
3829 break;
3830 }
3831 if (p[i+j] == 's')
3832 break;
3833 star = false;
3834 }
3835 j = 0;
3836 }
3837 /* If no %s found then just print normally */
3838 if (!p[i])
3839 break;
3840
3841 /* Copy up to the %s, and print that */
3842 strncpy(iter->fmt, p, i);
3843 iter->fmt[i] = '\0';
3844 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3845
3846 /*
3847 * If iter->seq is full, the above call no longer guarantees
3848 * that ap is in sync with fmt processing, and further calls
3849 * to va_arg() can return wrong positional arguments.
3850 *
3851 * Ensure that ap is no longer used in this case.
3852 */
3853 if (iter->seq.full) {
3854 p = "";
3855 break;
3856 }
3857
3858 if (star)
3859 len = va_arg(ap, int);
3860
3861 /* The ap now points to the string data of the %s */
3862 str = va_arg(ap, const char *);
3863
3864 /*
3865 * If you hit this warning, it is likely that the
3866 * trace event in question used %s on a string that
3867 * was saved at the time of the event, but may not be
3868 * around when the trace is read. Use __string(),
3869 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3870 * instead. See samples/trace_events/trace-events-sample.h
3871 * for reference.
3872 */
3873 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3874 "fmt: '%s' current_buffer: '%s'",
3875 fmt, show_buffer(&iter->seq))) {
3876 int ret;
3877
3878 /* Try to safely read the string */
3879 if (star) {
3880 if (len + 1 > iter->fmt_size)
3881 len = iter->fmt_size - 1;
3882 if (len < 0)
3883 len = 0;
3884 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3885 iter->fmt[len] = 0;
3886 star = false;
3887 } else {
3888 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3889 iter->fmt_size);
3890 }
3891 if (ret < 0)
3892 trace_seq_printf(&iter->seq, "(0x%px)", str);
3893 else
3894 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3895 str, iter->fmt);
3896 str = "[UNSAFE-MEMORY]";
3897 strcpy(iter->fmt, "%s");
3898 } else {
3899 strncpy(iter->fmt, p + i, j + 1);
3900 iter->fmt[j+1] = '\0';
3901 }
3902 if (star)
3903 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3904 else
3905 trace_seq_printf(&iter->seq, iter->fmt, str);
3906
3907 p += i + j + 1;
3908 }
3909 print:
3910 if (*p)
3911 trace_seq_vprintf(&iter->seq, p, ap);
3912 }
3913
trace_event_format(struct trace_iterator * iter,const char * fmt)3914 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3915 {
3916 const char *p, *new_fmt;
3917 char *q;
3918
3919 if (WARN_ON_ONCE(!fmt))
3920 return fmt;
3921
3922 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3923 return fmt;
3924
3925 p = fmt;
3926 new_fmt = q = iter->fmt;
3927 while (*p) {
3928 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3929 if (!trace_iter_expand_format(iter))
3930 return fmt;
3931
3932 q += iter->fmt - new_fmt;
3933 new_fmt = iter->fmt;
3934 }
3935
3936 *q++ = *p++;
3937
3938 /* Replace %p with %px */
3939 if (p[-1] == '%') {
3940 if (p[0] == '%') {
3941 *q++ = *p++;
3942 } else if (p[0] == 'p' && !isalnum(p[1])) {
3943 *q++ = *p++;
3944 *q++ = 'x';
3945 }
3946 }
3947 }
3948 *q = '\0';
3949
3950 return new_fmt;
3951 }
3952
3953 #define STATIC_TEMP_BUF_SIZE 128
3954 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3955
3956 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3957 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3958 int *ent_cpu, u64 *ent_ts)
3959 {
3960 /* __find_next_entry will reset ent_size */
3961 int ent_size = iter->ent_size;
3962 struct trace_entry *entry;
3963
3964 /*
3965 * If called from ftrace_dump(), then the iter->temp buffer
3966 * will be the static_temp_buf and not created from kmalloc.
3967 * If the entry size is greater than the buffer, we can
3968 * not save it. Just return NULL in that case. This is only
3969 * used to add markers when two consecutive events' time
3970 * stamps have a large delta. See trace_print_lat_context()
3971 */
3972 if (iter->temp == static_temp_buf &&
3973 STATIC_TEMP_BUF_SIZE < ent_size)
3974 return NULL;
3975
3976 /*
3977 * The __find_next_entry() may call peek_next_entry(), which may
3978 * call ring_buffer_peek() that may make the contents of iter->ent
3979 * undefined. Need to copy iter->ent now.
3980 */
3981 if (iter->ent && iter->ent != iter->temp) {
3982 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3983 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3984 void *temp;
3985 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3986 if (!temp)
3987 return NULL;
3988 kfree(iter->temp);
3989 iter->temp = temp;
3990 iter->temp_size = iter->ent_size;
3991 }
3992 memcpy(iter->temp, iter->ent, iter->ent_size);
3993 iter->ent = iter->temp;
3994 }
3995 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3996 /* Put back the original ent_size */
3997 iter->ent_size = ent_size;
3998
3999 return entry;
4000 }
4001
4002 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)4003 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4004 {
4005 iter->ent = __find_next_entry(iter, &iter->cpu,
4006 &iter->lost_events, &iter->ts);
4007
4008 if (iter->ent)
4009 trace_iterator_increment(iter);
4010
4011 return iter->ent ? iter : NULL;
4012 }
4013
trace_consume(struct trace_iterator * iter)4014 static void trace_consume(struct trace_iterator *iter)
4015 {
4016 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4017 &iter->lost_events);
4018 }
4019
s_next(struct seq_file * m,void * v,loff_t * pos)4020 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4021 {
4022 struct trace_iterator *iter = m->private;
4023 int i = (int)*pos;
4024 void *ent;
4025
4026 WARN_ON_ONCE(iter->leftover);
4027
4028 (*pos)++;
4029
4030 /* can't go backwards */
4031 if (iter->idx > i)
4032 return NULL;
4033
4034 if (iter->idx < 0)
4035 ent = trace_find_next_entry_inc(iter);
4036 else
4037 ent = iter;
4038
4039 while (ent && iter->idx < i)
4040 ent = trace_find_next_entry_inc(iter);
4041
4042 iter->pos = *pos;
4043
4044 return ent;
4045 }
4046
tracing_iter_reset(struct trace_iterator * iter,int cpu)4047 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4048 {
4049 struct ring_buffer_iter *buf_iter;
4050 unsigned long entries = 0;
4051 u64 ts;
4052
4053 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4054
4055 buf_iter = trace_buffer_iter(iter, cpu);
4056 if (!buf_iter)
4057 return;
4058
4059 ring_buffer_iter_reset(buf_iter);
4060
4061 /*
4062 * We could have the case with the max latency tracers
4063 * that a reset never took place on a cpu. This is evident
4064 * by the timestamp being before the start of the buffer.
4065 */
4066 while (ring_buffer_iter_peek(buf_iter, &ts)) {
4067 if (ts >= iter->array_buffer->time_start)
4068 break;
4069 entries++;
4070 ring_buffer_iter_advance(buf_iter);
4071 }
4072
4073 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4074 }
4075
4076 /*
4077 * The current tracer is copied to avoid a global locking
4078 * all around.
4079 */
s_start(struct seq_file * m,loff_t * pos)4080 static void *s_start(struct seq_file *m, loff_t *pos)
4081 {
4082 struct trace_iterator *iter = m->private;
4083 struct trace_array *tr = iter->tr;
4084 int cpu_file = iter->cpu_file;
4085 void *p = NULL;
4086 loff_t l = 0;
4087 int cpu;
4088
4089 /*
4090 * copy the tracer to avoid using a global lock all around.
4091 * iter->trace is a copy of current_trace, the pointer to the
4092 * name may be used instead of a strcmp(), as iter->trace->name
4093 * will point to the same string as current_trace->name.
4094 */
4095 mutex_lock(&trace_types_lock);
4096 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
4097 /* Close iter->trace before switching to the new current tracer */
4098 if (iter->trace->close)
4099 iter->trace->close(iter);
4100 *iter->trace = *tr->current_trace;
4101 /* Reopen the new current tracer */
4102 if (iter->trace->open)
4103 iter->trace->open(iter);
4104 }
4105 mutex_unlock(&trace_types_lock);
4106
4107 #ifdef CONFIG_TRACER_MAX_TRACE
4108 if (iter->snapshot && iter->trace->use_max_tr)
4109 return ERR_PTR(-EBUSY);
4110 #endif
4111
4112 if (*pos != iter->pos) {
4113 iter->ent = NULL;
4114 iter->cpu = 0;
4115 iter->idx = -1;
4116
4117 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4118 for_each_tracing_cpu(cpu)
4119 tracing_iter_reset(iter, cpu);
4120 } else
4121 tracing_iter_reset(iter, cpu_file);
4122
4123 iter->leftover = 0;
4124 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4125 ;
4126
4127 } else {
4128 /*
4129 * If we overflowed the seq_file before, then we want
4130 * to just reuse the trace_seq buffer again.
4131 */
4132 if (iter->leftover)
4133 p = iter;
4134 else {
4135 l = *pos - 1;
4136 p = s_next(m, p, &l);
4137 }
4138 }
4139
4140 trace_event_read_lock();
4141 trace_access_lock(cpu_file);
4142 return p;
4143 }
4144
s_stop(struct seq_file * m,void * p)4145 static void s_stop(struct seq_file *m, void *p)
4146 {
4147 struct trace_iterator *iter = m->private;
4148
4149 #ifdef CONFIG_TRACER_MAX_TRACE
4150 if (iter->snapshot && iter->trace->use_max_tr)
4151 return;
4152 #endif
4153
4154 trace_access_unlock(iter->cpu_file);
4155 trace_event_read_unlock();
4156 }
4157
4158 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4159 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4160 unsigned long *entries, int cpu)
4161 {
4162 unsigned long count;
4163
4164 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4165 /*
4166 * If this buffer has skipped entries, then we hold all
4167 * entries for the trace and we need to ignore the
4168 * ones before the time stamp.
4169 */
4170 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4171 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4172 /* total is the same as the entries */
4173 *total = count;
4174 } else
4175 *total = count +
4176 ring_buffer_overrun_cpu(buf->buffer, cpu);
4177 *entries = count;
4178 }
4179
4180 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4181 get_total_entries(struct array_buffer *buf,
4182 unsigned long *total, unsigned long *entries)
4183 {
4184 unsigned long t, e;
4185 int cpu;
4186
4187 *total = 0;
4188 *entries = 0;
4189
4190 for_each_tracing_cpu(cpu) {
4191 get_total_entries_cpu(buf, &t, &e, cpu);
4192 *total += t;
4193 *entries += e;
4194 }
4195 }
4196
trace_total_entries_cpu(struct trace_array * tr,int cpu)4197 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4198 {
4199 unsigned long total, entries;
4200
4201 if (!tr)
4202 tr = &global_trace;
4203
4204 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4205
4206 return entries;
4207 }
4208
trace_total_entries(struct trace_array * tr)4209 unsigned long trace_total_entries(struct trace_array *tr)
4210 {
4211 unsigned long total, entries;
4212
4213 if (!tr)
4214 tr = &global_trace;
4215
4216 get_total_entries(&tr->array_buffer, &total, &entries);
4217
4218 return entries;
4219 }
4220
print_lat_help_header(struct seq_file * m)4221 static void print_lat_help_header(struct seq_file *m)
4222 {
4223 seq_puts(m, "# _------=> CPU# \n"
4224 "# / _-----=> irqs-off \n"
4225 "# | / _----=> need-resched \n"
4226 "# || / _---=> hardirq/softirq \n"
4227 "# ||| / _--=> preempt-depth \n"
4228 "# |||| / _-=> migrate-disable \n"
4229 "# ||||| / delay \n"
4230 "# cmd pid |||||| time | caller \n"
4231 "# \\ / |||||| \\ | / \n");
4232 }
4233
print_event_info(struct array_buffer * buf,struct seq_file * m)4234 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4235 {
4236 unsigned long total;
4237 unsigned long entries;
4238
4239 get_total_entries(buf, &total, &entries);
4240 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4241 entries, total, num_online_cpus());
4242 seq_puts(m, "#\n");
4243 }
4244
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4245 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4246 unsigned int flags)
4247 {
4248 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4249
4250 print_event_info(buf, m);
4251
4252 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4253 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4254 }
4255
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4256 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4257 unsigned int flags)
4258 {
4259 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4260 const char *space = " ";
4261 int prec = tgid ? 12 : 2;
4262
4263 print_event_info(buf, m);
4264
4265 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
4266 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4267 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4268 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4269 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4270 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4271 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4272 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4273 }
4274
4275 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4276 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4277 {
4278 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4279 struct array_buffer *buf = iter->array_buffer;
4280 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4281 struct tracer *type = iter->trace;
4282 unsigned long entries;
4283 unsigned long total;
4284 const char *name = "preemption";
4285
4286 name = type->name;
4287
4288 get_total_entries(buf, &total, &entries);
4289
4290 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4291 name, UTS_RELEASE);
4292 seq_puts(m, "# -----------------------------------"
4293 "---------------------------------\n");
4294 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4295 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4296 nsecs_to_usecs(data->saved_latency),
4297 entries,
4298 total,
4299 buf->cpu,
4300 #if defined(CONFIG_PREEMPT_NONE)
4301 "server",
4302 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4303 "desktop",
4304 #elif defined(CONFIG_PREEMPT)
4305 "preempt",
4306 #elif defined(CONFIG_PREEMPT_RT)
4307 "preempt_rt",
4308 #else
4309 "unknown",
4310 #endif
4311 /* These are reserved for later use */
4312 0, 0, 0, 0);
4313 #ifdef CONFIG_SMP
4314 seq_printf(m, " #P:%d)\n", num_online_cpus());
4315 #else
4316 seq_puts(m, ")\n");
4317 #endif
4318 seq_puts(m, "# -----------------\n");
4319 seq_printf(m, "# | task: %.16s-%d "
4320 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4321 data->comm, data->pid,
4322 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4323 data->policy, data->rt_priority);
4324 seq_puts(m, "# -----------------\n");
4325
4326 if (data->critical_start) {
4327 seq_puts(m, "# => started at: ");
4328 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4329 trace_print_seq(m, &iter->seq);
4330 seq_puts(m, "\n# => ended at: ");
4331 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4332 trace_print_seq(m, &iter->seq);
4333 seq_puts(m, "\n#\n");
4334 }
4335
4336 seq_puts(m, "#\n");
4337 }
4338
test_cpu_buff_start(struct trace_iterator * iter)4339 static void test_cpu_buff_start(struct trace_iterator *iter)
4340 {
4341 struct trace_seq *s = &iter->seq;
4342 struct trace_array *tr = iter->tr;
4343
4344 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4345 return;
4346
4347 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4348 return;
4349
4350 if (cpumask_available(iter->started) &&
4351 cpumask_test_cpu(iter->cpu, iter->started))
4352 return;
4353
4354 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4355 return;
4356
4357 if (cpumask_available(iter->started))
4358 cpumask_set_cpu(iter->cpu, iter->started);
4359
4360 /* Don't print started cpu buffer for the first entry of the trace */
4361 if (iter->idx > 1)
4362 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4363 iter->cpu);
4364 }
4365
print_trace_fmt(struct trace_iterator * iter)4366 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4367 {
4368 struct trace_array *tr = iter->tr;
4369 struct trace_seq *s = &iter->seq;
4370 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4371 struct trace_entry *entry;
4372 struct trace_event *event;
4373
4374 entry = iter->ent;
4375
4376 test_cpu_buff_start(iter);
4377
4378 event = ftrace_find_event(entry->type);
4379
4380 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4381 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4382 trace_print_lat_context(iter);
4383 else
4384 trace_print_context(iter);
4385 }
4386
4387 if (trace_seq_has_overflowed(s))
4388 return TRACE_TYPE_PARTIAL_LINE;
4389
4390 if (event)
4391 return event->funcs->trace(iter, sym_flags, event);
4392
4393 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4394
4395 return trace_handle_return(s);
4396 }
4397
print_raw_fmt(struct trace_iterator * iter)4398 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4399 {
4400 struct trace_array *tr = iter->tr;
4401 struct trace_seq *s = &iter->seq;
4402 struct trace_entry *entry;
4403 struct trace_event *event;
4404
4405 entry = iter->ent;
4406
4407 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4408 trace_seq_printf(s, "%d %d %llu ",
4409 entry->pid, iter->cpu, iter->ts);
4410
4411 if (trace_seq_has_overflowed(s))
4412 return TRACE_TYPE_PARTIAL_LINE;
4413
4414 event = ftrace_find_event(entry->type);
4415 if (event)
4416 return event->funcs->raw(iter, 0, event);
4417
4418 trace_seq_printf(s, "%d ?\n", entry->type);
4419
4420 return trace_handle_return(s);
4421 }
4422
print_hex_fmt(struct trace_iterator * iter)4423 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4424 {
4425 struct trace_array *tr = iter->tr;
4426 struct trace_seq *s = &iter->seq;
4427 unsigned char newline = '\n';
4428 struct trace_entry *entry;
4429 struct trace_event *event;
4430
4431 entry = iter->ent;
4432
4433 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4434 SEQ_PUT_HEX_FIELD(s, entry->pid);
4435 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4436 SEQ_PUT_HEX_FIELD(s, iter->ts);
4437 if (trace_seq_has_overflowed(s))
4438 return TRACE_TYPE_PARTIAL_LINE;
4439 }
4440
4441 event = ftrace_find_event(entry->type);
4442 if (event) {
4443 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4444 if (ret != TRACE_TYPE_HANDLED)
4445 return ret;
4446 }
4447
4448 SEQ_PUT_FIELD(s, newline);
4449
4450 return trace_handle_return(s);
4451 }
4452
print_bin_fmt(struct trace_iterator * iter)4453 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4454 {
4455 struct trace_array *tr = iter->tr;
4456 struct trace_seq *s = &iter->seq;
4457 struct trace_entry *entry;
4458 struct trace_event *event;
4459
4460 entry = iter->ent;
4461
4462 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4463 SEQ_PUT_FIELD(s, entry->pid);
4464 SEQ_PUT_FIELD(s, iter->cpu);
4465 SEQ_PUT_FIELD(s, iter->ts);
4466 if (trace_seq_has_overflowed(s))
4467 return TRACE_TYPE_PARTIAL_LINE;
4468 }
4469
4470 event = ftrace_find_event(entry->type);
4471 return event ? event->funcs->binary(iter, 0, event) :
4472 TRACE_TYPE_HANDLED;
4473 }
4474
trace_empty(struct trace_iterator * iter)4475 int trace_empty(struct trace_iterator *iter)
4476 {
4477 struct ring_buffer_iter *buf_iter;
4478 int cpu;
4479
4480 /* If we are looking at one CPU buffer, only check that one */
4481 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4482 cpu = iter->cpu_file;
4483 buf_iter = trace_buffer_iter(iter, cpu);
4484 if (buf_iter) {
4485 if (!ring_buffer_iter_empty(buf_iter))
4486 return 0;
4487 } else {
4488 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4489 return 0;
4490 }
4491 return 1;
4492 }
4493
4494 for_each_tracing_cpu(cpu) {
4495 buf_iter = trace_buffer_iter(iter, cpu);
4496 if (buf_iter) {
4497 if (!ring_buffer_iter_empty(buf_iter))
4498 return 0;
4499 } else {
4500 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4501 return 0;
4502 }
4503 }
4504
4505 return 1;
4506 }
4507
4508 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4509 enum print_line_t print_trace_line(struct trace_iterator *iter)
4510 {
4511 struct trace_array *tr = iter->tr;
4512 unsigned long trace_flags = tr->trace_flags;
4513 enum print_line_t ret;
4514
4515 if (iter->lost_events) {
4516 if (iter->lost_events == (unsigned long)-1)
4517 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4518 iter->cpu);
4519 else
4520 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4521 iter->cpu, iter->lost_events);
4522 if (trace_seq_has_overflowed(&iter->seq))
4523 return TRACE_TYPE_PARTIAL_LINE;
4524 }
4525
4526 if (iter->trace && iter->trace->print_line) {
4527 ret = iter->trace->print_line(iter);
4528 if (ret != TRACE_TYPE_UNHANDLED)
4529 return ret;
4530 }
4531
4532 if (iter->ent->type == TRACE_BPUTS &&
4533 trace_flags & TRACE_ITER_PRINTK &&
4534 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4535 return trace_print_bputs_msg_only(iter);
4536
4537 if (iter->ent->type == TRACE_BPRINT &&
4538 trace_flags & TRACE_ITER_PRINTK &&
4539 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4540 return trace_print_bprintk_msg_only(iter);
4541
4542 if (iter->ent->type == TRACE_PRINT &&
4543 trace_flags & TRACE_ITER_PRINTK &&
4544 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4545 return trace_print_printk_msg_only(iter);
4546
4547 if (trace_flags & TRACE_ITER_BIN)
4548 return print_bin_fmt(iter);
4549
4550 if (trace_flags & TRACE_ITER_HEX)
4551 return print_hex_fmt(iter);
4552
4553 if (trace_flags & TRACE_ITER_RAW)
4554 return print_raw_fmt(iter);
4555
4556 return print_trace_fmt(iter);
4557 }
4558
trace_latency_header(struct seq_file * m)4559 void trace_latency_header(struct seq_file *m)
4560 {
4561 struct trace_iterator *iter = m->private;
4562 struct trace_array *tr = iter->tr;
4563
4564 /* print nothing if the buffers are empty */
4565 if (trace_empty(iter))
4566 return;
4567
4568 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4569 print_trace_header(m, iter);
4570
4571 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4572 print_lat_help_header(m);
4573 }
4574
trace_default_header(struct seq_file * m)4575 void trace_default_header(struct seq_file *m)
4576 {
4577 struct trace_iterator *iter = m->private;
4578 struct trace_array *tr = iter->tr;
4579 unsigned long trace_flags = tr->trace_flags;
4580
4581 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4582 return;
4583
4584 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4585 /* print nothing if the buffers are empty */
4586 if (trace_empty(iter))
4587 return;
4588 print_trace_header(m, iter);
4589 if (!(trace_flags & TRACE_ITER_VERBOSE))
4590 print_lat_help_header(m);
4591 } else {
4592 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4593 if (trace_flags & TRACE_ITER_IRQ_INFO)
4594 print_func_help_header_irq(iter->array_buffer,
4595 m, trace_flags);
4596 else
4597 print_func_help_header(iter->array_buffer, m,
4598 trace_flags);
4599 }
4600 }
4601 }
4602
test_ftrace_alive(struct seq_file * m)4603 static void test_ftrace_alive(struct seq_file *m)
4604 {
4605 if (!ftrace_is_dead())
4606 return;
4607 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4608 "# MAY BE MISSING FUNCTION EVENTS\n");
4609 }
4610
4611 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4612 static void show_snapshot_main_help(struct seq_file *m)
4613 {
4614 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4615 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4616 "# Takes a snapshot of the main buffer.\n"
4617 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4618 "# (Doesn't have to be '2' works with any number that\n"
4619 "# is not a '0' or '1')\n");
4620 }
4621
show_snapshot_percpu_help(struct seq_file * m)4622 static void show_snapshot_percpu_help(struct seq_file *m)
4623 {
4624 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4625 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4626 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4627 "# Takes a snapshot of the main buffer for this cpu.\n");
4628 #else
4629 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4630 "# Must use main snapshot file to allocate.\n");
4631 #endif
4632 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4633 "# (Doesn't have to be '2' works with any number that\n"
4634 "# is not a '0' or '1')\n");
4635 }
4636
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4637 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4638 {
4639 if (iter->tr->allocated_snapshot)
4640 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4641 else
4642 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4643
4644 seq_puts(m, "# Snapshot commands:\n");
4645 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4646 show_snapshot_main_help(m);
4647 else
4648 show_snapshot_percpu_help(m);
4649 }
4650 #else
4651 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4652 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4653 #endif
4654
s_show(struct seq_file * m,void * v)4655 static int s_show(struct seq_file *m, void *v)
4656 {
4657 struct trace_iterator *iter = v;
4658 int ret;
4659
4660 if (iter->ent == NULL) {
4661 if (iter->tr) {
4662 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4663 seq_puts(m, "#\n");
4664 test_ftrace_alive(m);
4665 }
4666 if (iter->snapshot && trace_empty(iter))
4667 print_snapshot_help(m, iter);
4668 else if (iter->trace && iter->trace->print_header)
4669 iter->trace->print_header(m);
4670 else
4671 trace_default_header(m);
4672
4673 } else if (iter->leftover) {
4674 /*
4675 * If we filled the seq_file buffer earlier, we
4676 * want to just show it now.
4677 */
4678 ret = trace_print_seq(m, &iter->seq);
4679
4680 /* ret should this time be zero, but you never know */
4681 iter->leftover = ret;
4682
4683 } else {
4684 ret = print_trace_line(iter);
4685 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4686 iter->seq.full = 0;
4687 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4688 }
4689 ret = trace_print_seq(m, &iter->seq);
4690 /*
4691 * If we overflow the seq_file buffer, then it will
4692 * ask us for this data again at start up.
4693 * Use that instead.
4694 * ret is 0 if seq_file write succeeded.
4695 * -1 otherwise.
4696 */
4697 iter->leftover = ret;
4698 }
4699
4700 return 0;
4701 }
4702
4703 /*
4704 * Should be used after trace_array_get(), trace_types_lock
4705 * ensures that i_cdev was already initialized.
4706 */
tracing_get_cpu(struct inode * inode)4707 static inline int tracing_get_cpu(struct inode *inode)
4708 {
4709 if (inode->i_cdev) /* See trace_create_cpu_file() */
4710 return (long)inode->i_cdev - 1;
4711 return RING_BUFFER_ALL_CPUS;
4712 }
4713
4714 static const struct seq_operations tracer_seq_ops = {
4715 .start = s_start,
4716 .next = s_next,
4717 .stop = s_stop,
4718 .show = s_show,
4719 };
4720
4721 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4722 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4723 {
4724 struct trace_array *tr = inode->i_private;
4725 struct trace_iterator *iter;
4726 int cpu;
4727
4728 if (tracing_disabled)
4729 return ERR_PTR(-ENODEV);
4730
4731 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4732 if (!iter)
4733 return ERR_PTR(-ENOMEM);
4734
4735 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4736 GFP_KERNEL);
4737 if (!iter->buffer_iter)
4738 goto release;
4739
4740 /*
4741 * trace_find_next_entry() may need to save off iter->ent.
4742 * It will place it into the iter->temp buffer. As most
4743 * events are less than 128, allocate a buffer of that size.
4744 * If one is greater, then trace_find_next_entry() will
4745 * allocate a new buffer to adjust for the bigger iter->ent.
4746 * It's not critical if it fails to get allocated here.
4747 */
4748 iter->temp = kmalloc(128, GFP_KERNEL);
4749 if (iter->temp)
4750 iter->temp_size = 128;
4751
4752 /*
4753 * trace_event_printf() may need to modify given format
4754 * string to replace %p with %px so that it shows real address
4755 * instead of hash value. However, that is only for the event
4756 * tracing, other tracer may not need. Defer the allocation
4757 * until it is needed.
4758 */
4759 iter->fmt = NULL;
4760 iter->fmt_size = 0;
4761
4762 /*
4763 * We make a copy of the current tracer to avoid concurrent
4764 * changes on it while we are reading.
4765 */
4766 mutex_lock(&trace_types_lock);
4767 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4768 if (!iter->trace)
4769 goto fail;
4770
4771 *iter->trace = *tr->current_trace;
4772
4773 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4774 goto fail;
4775
4776 iter->tr = tr;
4777
4778 #ifdef CONFIG_TRACER_MAX_TRACE
4779 /* Currently only the top directory has a snapshot */
4780 if (tr->current_trace->print_max || snapshot)
4781 iter->array_buffer = &tr->max_buffer;
4782 else
4783 #endif
4784 iter->array_buffer = &tr->array_buffer;
4785 iter->snapshot = snapshot;
4786 iter->pos = -1;
4787 iter->cpu_file = tracing_get_cpu(inode);
4788 mutex_init(&iter->mutex);
4789
4790 /* Notify the tracer early; before we stop tracing. */
4791 if (iter->trace->open)
4792 iter->trace->open(iter);
4793
4794 /* Annotate start of buffers if we had overruns */
4795 if (ring_buffer_overruns(iter->array_buffer->buffer))
4796 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4797
4798 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4799 if (trace_clocks[tr->clock_id].in_ns)
4800 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4801
4802 /*
4803 * If pause-on-trace is enabled, then stop the trace while
4804 * dumping, unless this is the "snapshot" file
4805 */
4806 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4807 tracing_stop_tr(tr);
4808
4809 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4810 for_each_tracing_cpu(cpu) {
4811 iter->buffer_iter[cpu] =
4812 ring_buffer_read_prepare(iter->array_buffer->buffer,
4813 cpu, GFP_KERNEL);
4814 }
4815 ring_buffer_read_prepare_sync();
4816 for_each_tracing_cpu(cpu) {
4817 ring_buffer_read_start(iter->buffer_iter[cpu]);
4818 tracing_iter_reset(iter, cpu);
4819 }
4820 } else {
4821 cpu = iter->cpu_file;
4822 iter->buffer_iter[cpu] =
4823 ring_buffer_read_prepare(iter->array_buffer->buffer,
4824 cpu, GFP_KERNEL);
4825 ring_buffer_read_prepare_sync();
4826 ring_buffer_read_start(iter->buffer_iter[cpu]);
4827 tracing_iter_reset(iter, cpu);
4828 }
4829
4830 mutex_unlock(&trace_types_lock);
4831
4832 return iter;
4833
4834 fail:
4835 mutex_unlock(&trace_types_lock);
4836 kfree(iter->trace);
4837 kfree(iter->temp);
4838 kfree(iter->buffer_iter);
4839 release:
4840 seq_release_private(inode, file);
4841 return ERR_PTR(-ENOMEM);
4842 }
4843
tracing_open_generic(struct inode * inode,struct file * filp)4844 int tracing_open_generic(struct inode *inode, struct file *filp)
4845 {
4846 int ret;
4847
4848 ret = tracing_check_open_get_tr(NULL);
4849 if (ret)
4850 return ret;
4851
4852 filp->private_data = inode->i_private;
4853 return 0;
4854 }
4855
tracing_is_disabled(void)4856 bool tracing_is_disabled(void)
4857 {
4858 return (tracing_disabled) ? true: false;
4859 }
4860
4861 /*
4862 * Open and update trace_array ref count.
4863 * Must have the current trace_array passed to it.
4864 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4865 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4866 {
4867 struct trace_array *tr = inode->i_private;
4868 int ret;
4869
4870 ret = tracing_check_open_get_tr(tr);
4871 if (ret)
4872 return ret;
4873
4874 filp->private_data = inode->i_private;
4875
4876 return 0;
4877 }
4878
4879 /*
4880 * The private pointer of the inode is the trace_event_file.
4881 * Update the tr ref count associated to it.
4882 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4883 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4884 {
4885 struct trace_event_file *file = inode->i_private;
4886 int ret;
4887
4888 ret = tracing_check_open_get_tr(file->tr);
4889 if (ret)
4890 return ret;
4891
4892 filp->private_data = inode->i_private;
4893
4894 return 0;
4895 }
4896
tracing_release_file_tr(struct inode * inode,struct file * filp)4897 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4898 {
4899 struct trace_event_file *file = inode->i_private;
4900
4901 trace_array_put(file->tr);
4902
4903 return 0;
4904 }
4905
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4906 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4907 {
4908 tracing_release_file_tr(inode, filp);
4909 return single_release(inode, filp);
4910 }
4911
tracing_mark_open(struct inode * inode,struct file * filp)4912 static int tracing_mark_open(struct inode *inode, struct file *filp)
4913 {
4914 stream_open(inode, filp);
4915 return tracing_open_generic_tr(inode, filp);
4916 }
4917
tracing_release(struct inode * inode,struct file * file)4918 static int tracing_release(struct inode *inode, struct file *file)
4919 {
4920 struct trace_array *tr = inode->i_private;
4921 struct seq_file *m = file->private_data;
4922 struct trace_iterator *iter;
4923 int cpu;
4924
4925 if (!(file->f_mode & FMODE_READ)) {
4926 trace_array_put(tr);
4927 return 0;
4928 }
4929
4930 /* Writes do not use seq_file */
4931 iter = m->private;
4932 mutex_lock(&trace_types_lock);
4933
4934 for_each_tracing_cpu(cpu) {
4935 if (iter->buffer_iter[cpu])
4936 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4937 }
4938
4939 if (iter->trace && iter->trace->close)
4940 iter->trace->close(iter);
4941
4942 if (!iter->snapshot && tr->stop_count)
4943 /* reenable tracing if it was previously enabled */
4944 tracing_start_tr(tr);
4945
4946 __trace_array_put(tr);
4947
4948 mutex_unlock(&trace_types_lock);
4949
4950 mutex_destroy(&iter->mutex);
4951 free_cpumask_var(iter->started);
4952 kfree(iter->fmt);
4953 kfree(iter->temp);
4954 kfree(iter->trace);
4955 kfree(iter->buffer_iter);
4956 seq_release_private(inode, file);
4957
4958 return 0;
4959 }
4960
tracing_release_generic_tr(struct inode * inode,struct file * file)4961 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4962 {
4963 struct trace_array *tr = inode->i_private;
4964
4965 trace_array_put(tr);
4966 return 0;
4967 }
4968
tracing_single_release_tr(struct inode * inode,struct file * file)4969 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4970 {
4971 struct trace_array *tr = inode->i_private;
4972
4973 trace_array_put(tr);
4974
4975 return single_release(inode, file);
4976 }
4977
tracing_open(struct inode * inode,struct file * file)4978 static int tracing_open(struct inode *inode, struct file *file)
4979 {
4980 struct trace_array *tr = inode->i_private;
4981 struct trace_iterator *iter;
4982 int ret;
4983
4984 ret = tracing_check_open_get_tr(tr);
4985 if (ret)
4986 return ret;
4987
4988 /* If this file was open for write, then erase contents */
4989 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4990 int cpu = tracing_get_cpu(inode);
4991 struct array_buffer *trace_buf = &tr->array_buffer;
4992
4993 #ifdef CONFIG_TRACER_MAX_TRACE
4994 if (tr->current_trace->print_max)
4995 trace_buf = &tr->max_buffer;
4996 #endif
4997
4998 if (cpu == RING_BUFFER_ALL_CPUS)
4999 tracing_reset_online_cpus(trace_buf);
5000 else
5001 tracing_reset_cpu(trace_buf, cpu);
5002 }
5003
5004 if (file->f_mode & FMODE_READ) {
5005 iter = __tracing_open(inode, file, false);
5006 if (IS_ERR(iter))
5007 ret = PTR_ERR(iter);
5008 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5009 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5010 }
5011
5012 if (ret < 0)
5013 trace_array_put(tr);
5014
5015 return ret;
5016 }
5017
5018 /*
5019 * Some tracers are not suitable for instance buffers.
5020 * A tracer is always available for the global array (toplevel)
5021 * or if it explicitly states that it is.
5022 */
5023 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5024 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5025 {
5026 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5027 }
5028
5029 /* Find the next tracer that this trace array may use */
5030 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5031 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5032 {
5033 while (t && !trace_ok_for_array(t, tr))
5034 t = t->next;
5035
5036 return t;
5037 }
5038
5039 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5040 t_next(struct seq_file *m, void *v, loff_t *pos)
5041 {
5042 struct trace_array *tr = m->private;
5043 struct tracer *t = v;
5044
5045 (*pos)++;
5046
5047 if (t)
5048 t = get_tracer_for_array(tr, t->next);
5049
5050 return t;
5051 }
5052
t_start(struct seq_file * m,loff_t * pos)5053 static void *t_start(struct seq_file *m, loff_t *pos)
5054 {
5055 struct trace_array *tr = m->private;
5056 struct tracer *t;
5057 loff_t l = 0;
5058
5059 mutex_lock(&trace_types_lock);
5060
5061 t = get_tracer_for_array(tr, trace_types);
5062 for (; t && l < *pos; t = t_next(m, t, &l))
5063 ;
5064
5065 return t;
5066 }
5067
t_stop(struct seq_file * m,void * p)5068 static void t_stop(struct seq_file *m, void *p)
5069 {
5070 mutex_unlock(&trace_types_lock);
5071 }
5072
t_show(struct seq_file * m,void * v)5073 static int t_show(struct seq_file *m, void *v)
5074 {
5075 struct tracer *t = v;
5076
5077 if (!t)
5078 return 0;
5079
5080 seq_puts(m, t->name);
5081 if (t->next)
5082 seq_putc(m, ' ');
5083 else
5084 seq_putc(m, '\n');
5085
5086 return 0;
5087 }
5088
5089 static const struct seq_operations show_traces_seq_ops = {
5090 .start = t_start,
5091 .next = t_next,
5092 .stop = t_stop,
5093 .show = t_show,
5094 };
5095
show_traces_open(struct inode * inode,struct file * file)5096 static int show_traces_open(struct inode *inode, struct file *file)
5097 {
5098 struct trace_array *tr = inode->i_private;
5099 struct seq_file *m;
5100 int ret;
5101
5102 ret = tracing_check_open_get_tr(tr);
5103 if (ret)
5104 return ret;
5105
5106 ret = seq_open(file, &show_traces_seq_ops);
5107 if (ret) {
5108 trace_array_put(tr);
5109 return ret;
5110 }
5111
5112 m = file->private_data;
5113 m->private = tr;
5114
5115 return 0;
5116 }
5117
show_traces_release(struct inode * inode,struct file * file)5118 static int show_traces_release(struct inode *inode, struct file *file)
5119 {
5120 struct trace_array *tr = inode->i_private;
5121
5122 trace_array_put(tr);
5123 return seq_release(inode, file);
5124 }
5125
5126 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5127 tracing_write_stub(struct file *filp, const char __user *ubuf,
5128 size_t count, loff_t *ppos)
5129 {
5130 return count;
5131 }
5132
tracing_lseek(struct file * file,loff_t offset,int whence)5133 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5134 {
5135 int ret;
5136
5137 if (file->f_mode & FMODE_READ)
5138 ret = seq_lseek(file, offset, whence);
5139 else
5140 file->f_pos = ret = 0;
5141
5142 return ret;
5143 }
5144
5145 static const struct file_operations tracing_fops = {
5146 .open = tracing_open,
5147 .read = seq_read,
5148 .read_iter = seq_read_iter,
5149 .splice_read = generic_file_splice_read,
5150 .write = tracing_write_stub,
5151 .llseek = tracing_lseek,
5152 .release = tracing_release,
5153 };
5154
5155 static const struct file_operations show_traces_fops = {
5156 .open = show_traces_open,
5157 .read = seq_read,
5158 .llseek = seq_lseek,
5159 .release = show_traces_release,
5160 };
5161
5162 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5163 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5164 size_t count, loff_t *ppos)
5165 {
5166 struct trace_array *tr = file_inode(filp)->i_private;
5167 char *mask_str;
5168 int len;
5169
5170 len = snprintf(NULL, 0, "%*pb\n",
5171 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5172 mask_str = kmalloc(len, GFP_KERNEL);
5173 if (!mask_str)
5174 return -ENOMEM;
5175
5176 len = snprintf(mask_str, len, "%*pb\n",
5177 cpumask_pr_args(tr->tracing_cpumask));
5178 if (len >= count) {
5179 count = -EINVAL;
5180 goto out_err;
5181 }
5182 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5183
5184 out_err:
5185 kfree(mask_str);
5186
5187 return count;
5188 }
5189
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5190 int tracing_set_cpumask(struct trace_array *tr,
5191 cpumask_var_t tracing_cpumask_new)
5192 {
5193 int cpu;
5194
5195 if (!tr)
5196 return -EINVAL;
5197
5198 local_irq_disable();
5199 arch_spin_lock(&tr->max_lock);
5200 for_each_tracing_cpu(cpu) {
5201 /*
5202 * Increase/decrease the disabled counter if we are
5203 * about to flip a bit in the cpumask:
5204 */
5205 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5206 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5207 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5208 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5209 #ifdef CONFIG_TRACER_MAX_TRACE
5210 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5211 #endif
5212 }
5213 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5214 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5215 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5216 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5217 #ifdef CONFIG_TRACER_MAX_TRACE
5218 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5219 #endif
5220 }
5221 }
5222 arch_spin_unlock(&tr->max_lock);
5223 local_irq_enable();
5224
5225 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5226
5227 return 0;
5228 }
5229
5230 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5231 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5232 size_t count, loff_t *ppos)
5233 {
5234 struct trace_array *tr = file_inode(filp)->i_private;
5235 cpumask_var_t tracing_cpumask_new;
5236 int err;
5237
5238 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5239 return -ENOMEM;
5240
5241 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5242 if (err)
5243 goto err_free;
5244
5245 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5246 if (err)
5247 goto err_free;
5248
5249 free_cpumask_var(tracing_cpumask_new);
5250
5251 return count;
5252
5253 err_free:
5254 free_cpumask_var(tracing_cpumask_new);
5255
5256 return err;
5257 }
5258
5259 static const struct file_operations tracing_cpumask_fops = {
5260 .open = tracing_open_generic_tr,
5261 .read = tracing_cpumask_read,
5262 .write = tracing_cpumask_write,
5263 .release = tracing_release_generic_tr,
5264 .llseek = generic_file_llseek,
5265 };
5266
tracing_trace_options_show(struct seq_file * m,void * v)5267 static int tracing_trace_options_show(struct seq_file *m, void *v)
5268 {
5269 struct tracer_opt *trace_opts;
5270 struct trace_array *tr = m->private;
5271 u32 tracer_flags;
5272 int i;
5273
5274 mutex_lock(&trace_types_lock);
5275 tracer_flags = tr->current_trace->flags->val;
5276 trace_opts = tr->current_trace->flags->opts;
5277
5278 for (i = 0; trace_options[i]; i++) {
5279 if (tr->trace_flags & (1 << i))
5280 seq_printf(m, "%s\n", trace_options[i]);
5281 else
5282 seq_printf(m, "no%s\n", trace_options[i]);
5283 }
5284
5285 for (i = 0; trace_opts[i].name; i++) {
5286 if (tracer_flags & trace_opts[i].bit)
5287 seq_printf(m, "%s\n", trace_opts[i].name);
5288 else
5289 seq_printf(m, "no%s\n", trace_opts[i].name);
5290 }
5291 mutex_unlock(&trace_types_lock);
5292
5293 return 0;
5294 }
5295
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5296 static int __set_tracer_option(struct trace_array *tr,
5297 struct tracer_flags *tracer_flags,
5298 struct tracer_opt *opts, int neg)
5299 {
5300 struct tracer *trace = tracer_flags->trace;
5301 int ret;
5302
5303 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5304 if (ret)
5305 return ret;
5306
5307 if (neg)
5308 tracer_flags->val &= ~opts->bit;
5309 else
5310 tracer_flags->val |= opts->bit;
5311 return 0;
5312 }
5313
5314 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5315 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5316 {
5317 struct tracer *trace = tr->current_trace;
5318 struct tracer_flags *tracer_flags = trace->flags;
5319 struct tracer_opt *opts = NULL;
5320 int i;
5321
5322 for (i = 0; tracer_flags->opts[i].name; i++) {
5323 opts = &tracer_flags->opts[i];
5324
5325 if (strcmp(cmp, opts->name) == 0)
5326 return __set_tracer_option(tr, trace->flags, opts, neg);
5327 }
5328
5329 return -EINVAL;
5330 }
5331
5332 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5333 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5334 {
5335 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5336 return -1;
5337
5338 return 0;
5339 }
5340
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5341 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5342 {
5343 int *map;
5344
5345 if ((mask == TRACE_ITER_RECORD_TGID) ||
5346 (mask == TRACE_ITER_RECORD_CMD))
5347 lockdep_assert_held(&event_mutex);
5348
5349 /* do nothing if flag is already set */
5350 if (!!(tr->trace_flags & mask) == !!enabled)
5351 return 0;
5352
5353 /* Give the tracer a chance to approve the change */
5354 if (tr->current_trace->flag_changed)
5355 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5356 return -EINVAL;
5357
5358 if (enabled)
5359 tr->trace_flags |= mask;
5360 else
5361 tr->trace_flags &= ~mask;
5362
5363 if (mask == TRACE_ITER_RECORD_CMD)
5364 trace_event_enable_cmd_record(enabled);
5365
5366 if (mask == TRACE_ITER_RECORD_TGID) {
5367 if (!tgid_map) {
5368 tgid_map_max = pid_max;
5369 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5370 GFP_KERNEL);
5371
5372 /*
5373 * Pairs with smp_load_acquire() in
5374 * trace_find_tgid_ptr() to ensure that if it observes
5375 * the tgid_map we just allocated then it also observes
5376 * the corresponding tgid_map_max value.
5377 */
5378 smp_store_release(&tgid_map, map);
5379 }
5380 if (!tgid_map) {
5381 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5382 return -ENOMEM;
5383 }
5384
5385 trace_event_enable_tgid_record(enabled);
5386 }
5387
5388 if (mask == TRACE_ITER_EVENT_FORK)
5389 trace_event_follow_fork(tr, enabled);
5390
5391 if (mask == TRACE_ITER_FUNC_FORK)
5392 ftrace_pid_follow_fork(tr, enabled);
5393
5394 if (mask == TRACE_ITER_OVERWRITE) {
5395 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5396 #ifdef CONFIG_TRACER_MAX_TRACE
5397 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5398 #endif
5399 }
5400
5401 if (mask == TRACE_ITER_PRINTK) {
5402 trace_printk_start_stop_comm(enabled);
5403 trace_printk_control(enabled);
5404 }
5405
5406 return 0;
5407 }
5408
trace_set_options(struct trace_array * tr,char * option)5409 int trace_set_options(struct trace_array *tr, char *option)
5410 {
5411 char *cmp;
5412 int neg = 0;
5413 int ret;
5414 size_t orig_len = strlen(option);
5415 int len;
5416
5417 cmp = strstrip(option);
5418
5419 len = str_has_prefix(cmp, "no");
5420 if (len)
5421 neg = 1;
5422
5423 cmp += len;
5424
5425 mutex_lock(&event_mutex);
5426 mutex_lock(&trace_types_lock);
5427
5428 ret = match_string(trace_options, -1, cmp);
5429 /* If no option could be set, test the specific tracer options */
5430 if (ret < 0)
5431 ret = set_tracer_option(tr, cmp, neg);
5432 else
5433 ret = set_tracer_flag(tr, 1 << ret, !neg);
5434
5435 mutex_unlock(&trace_types_lock);
5436 mutex_unlock(&event_mutex);
5437
5438 /*
5439 * If the first trailing whitespace is replaced with '\0' by strstrip,
5440 * turn it back into a space.
5441 */
5442 if (orig_len > strlen(option))
5443 option[strlen(option)] = ' ';
5444
5445 return ret;
5446 }
5447
apply_trace_boot_options(void)5448 static void __init apply_trace_boot_options(void)
5449 {
5450 char *buf = trace_boot_options_buf;
5451 char *option;
5452
5453 while (true) {
5454 option = strsep(&buf, ",");
5455
5456 if (!option)
5457 break;
5458
5459 if (*option)
5460 trace_set_options(&global_trace, option);
5461
5462 /* Put back the comma to allow this to be called again */
5463 if (buf)
5464 *(buf - 1) = ',';
5465 }
5466 }
5467
5468 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5469 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5470 size_t cnt, loff_t *ppos)
5471 {
5472 struct seq_file *m = filp->private_data;
5473 struct trace_array *tr = m->private;
5474 char buf[64];
5475 int ret;
5476
5477 if (cnt >= sizeof(buf))
5478 return -EINVAL;
5479
5480 if (copy_from_user(buf, ubuf, cnt))
5481 return -EFAULT;
5482
5483 buf[cnt] = 0;
5484
5485 ret = trace_set_options(tr, buf);
5486 if (ret < 0)
5487 return ret;
5488
5489 *ppos += cnt;
5490
5491 return cnt;
5492 }
5493
tracing_trace_options_open(struct inode * inode,struct file * file)5494 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5495 {
5496 struct trace_array *tr = inode->i_private;
5497 int ret;
5498
5499 ret = tracing_check_open_get_tr(tr);
5500 if (ret)
5501 return ret;
5502
5503 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5504 if (ret < 0)
5505 trace_array_put(tr);
5506
5507 return ret;
5508 }
5509
5510 static const struct file_operations tracing_iter_fops = {
5511 .open = tracing_trace_options_open,
5512 .read = seq_read,
5513 .llseek = seq_lseek,
5514 .release = tracing_single_release_tr,
5515 .write = tracing_trace_options_write,
5516 };
5517
5518 static const char readme_msg[] =
5519 "tracing mini-HOWTO:\n\n"
5520 "# echo 0 > tracing_on : quick way to disable tracing\n"
5521 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5522 " Important files:\n"
5523 " trace\t\t\t- The static contents of the buffer\n"
5524 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5525 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5526 " current_tracer\t- function and latency tracers\n"
5527 " available_tracers\t- list of configured tracers for current_tracer\n"
5528 " error_log\t- error log for failed commands (that support it)\n"
5529 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5530 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5531 " trace_clock\t\t-change the clock used to order events\n"
5532 " local: Per cpu clock but may not be synced across CPUs\n"
5533 " global: Synced across CPUs but slows tracing down.\n"
5534 " counter: Not a clock, but just an increment\n"
5535 " uptime: Jiffy counter from time of boot\n"
5536 " perf: Same clock that perf events use\n"
5537 #ifdef CONFIG_X86_64
5538 " x86-tsc: TSC cycle counter\n"
5539 #endif
5540 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5541 " delta: Delta difference against a buffer-wide timestamp\n"
5542 " absolute: Absolute (standalone) timestamp\n"
5543 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5544 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5545 " tracing_cpumask\t- Limit which CPUs to trace\n"
5546 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5547 "\t\t\t Remove sub-buffer with rmdir\n"
5548 " trace_options\t\t- Set format or modify how tracing happens\n"
5549 "\t\t\t Disable an option by prefixing 'no' to the\n"
5550 "\t\t\t option name\n"
5551 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5552 #ifdef CONFIG_DYNAMIC_FTRACE
5553 "\n available_filter_functions - list of functions that can be filtered on\n"
5554 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5555 "\t\t\t functions\n"
5556 "\t accepts: func_full_name or glob-matching-pattern\n"
5557 "\t modules: Can select a group via module\n"
5558 "\t Format: :mod:<module-name>\n"
5559 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5560 "\t triggers: a command to perform when function is hit\n"
5561 "\t Format: <function>:<trigger>[:count]\n"
5562 "\t trigger: traceon, traceoff\n"
5563 "\t\t enable_event:<system>:<event>\n"
5564 "\t\t disable_event:<system>:<event>\n"
5565 #ifdef CONFIG_STACKTRACE
5566 "\t\t stacktrace\n"
5567 #endif
5568 #ifdef CONFIG_TRACER_SNAPSHOT
5569 "\t\t snapshot\n"
5570 #endif
5571 "\t\t dump\n"
5572 "\t\t cpudump\n"
5573 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5574 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5575 "\t The first one will disable tracing every time do_fault is hit\n"
5576 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5577 "\t The first time do trap is hit and it disables tracing, the\n"
5578 "\t counter will decrement to 2. If tracing is already disabled,\n"
5579 "\t the counter will not decrement. It only decrements when the\n"
5580 "\t trigger did work\n"
5581 "\t To remove trigger without count:\n"
5582 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5583 "\t To remove trigger with a count:\n"
5584 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5585 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5586 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5587 "\t modules: Can select a group via module command :mod:\n"
5588 "\t Does not accept triggers\n"
5589 #endif /* CONFIG_DYNAMIC_FTRACE */
5590 #ifdef CONFIG_FUNCTION_TRACER
5591 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5592 "\t\t (function)\n"
5593 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5594 "\t\t (function)\n"
5595 #endif
5596 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5597 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5598 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5599 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5600 #endif
5601 #ifdef CONFIG_TRACER_SNAPSHOT
5602 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5603 "\t\t\t snapshot buffer. Read the contents for more\n"
5604 "\t\t\t information\n"
5605 #endif
5606 #ifdef CONFIG_STACK_TRACER
5607 " stack_trace\t\t- Shows the max stack trace when active\n"
5608 " stack_max_size\t- Shows current max stack size that was traced\n"
5609 "\t\t\t Write into this file to reset the max size (trigger a\n"
5610 "\t\t\t new trace)\n"
5611 #ifdef CONFIG_DYNAMIC_FTRACE
5612 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5613 "\t\t\t traces\n"
5614 #endif
5615 #endif /* CONFIG_STACK_TRACER */
5616 #ifdef CONFIG_DYNAMIC_EVENTS
5617 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5618 "\t\t\t Write into this file to define/undefine new trace events.\n"
5619 #endif
5620 #ifdef CONFIG_KPROBE_EVENTS
5621 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5622 "\t\t\t Write into this file to define/undefine new trace events.\n"
5623 #endif
5624 #ifdef CONFIG_UPROBE_EVENTS
5625 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5626 "\t\t\t Write into this file to define/undefine new trace events.\n"
5627 #endif
5628 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5629 "\t accepts: event-definitions (one definition per line)\n"
5630 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5631 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5632 #ifdef CONFIG_HIST_TRIGGERS
5633 "\t s:[synthetic/]<event> <field> [<field>]\n"
5634 #endif
5635 "\t e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5636 "\t -:[<group>/]<event>\n"
5637 #ifdef CONFIG_KPROBE_EVENTS
5638 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5639 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5640 #endif
5641 #ifdef CONFIG_UPROBE_EVENTS
5642 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5643 #endif
5644 "\t args: <name>=fetcharg[:type]\n"
5645 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5646 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5647 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5648 #else
5649 "\t $stack<index>, $stack, $retval, $comm,\n"
5650 #endif
5651 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5652 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5653 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5654 "\t symstr, <type>\\[<array-size>\\]\n"
5655 #ifdef CONFIG_HIST_TRIGGERS
5656 "\t field: <stype> <name>;\n"
5657 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5658 "\t [unsigned] char/int/long\n"
5659 #endif
5660 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5661 "\t of the <attached-group>/<attached-event>.\n"
5662 #endif
5663 " events/\t\t- Directory containing all trace event subsystems:\n"
5664 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5665 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5666 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5667 "\t\t\t events\n"
5668 " filter\t\t- If set, only events passing filter are traced\n"
5669 " events/<system>/<event>/\t- Directory containing control files for\n"
5670 "\t\t\t <event>:\n"
5671 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5672 " filter\t\t- If set, only events passing filter are traced\n"
5673 " trigger\t\t- If set, a command to perform when event is hit\n"
5674 "\t Format: <trigger>[:count][if <filter>]\n"
5675 "\t trigger: traceon, traceoff\n"
5676 "\t enable_event:<system>:<event>\n"
5677 "\t disable_event:<system>:<event>\n"
5678 #ifdef CONFIG_HIST_TRIGGERS
5679 "\t enable_hist:<system>:<event>\n"
5680 "\t disable_hist:<system>:<event>\n"
5681 #endif
5682 #ifdef CONFIG_STACKTRACE
5683 "\t\t stacktrace\n"
5684 #endif
5685 #ifdef CONFIG_TRACER_SNAPSHOT
5686 "\t\t snapshot\n"
5687 #endif
5688 #ifdef CONFIG_HIST_TRIGGERS
5689 "\t\t hist (see below)\n"
5690 #endif
5691 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5692 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5693 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5694 "\t events/block/block_unplug/trigger\n"
5695 "\t The first disables tracing every time block_unplug is hit.\n"
5696 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5697 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5698 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5699 "\t Like function triggers, the counter is only decremented if it\n"
5700 "\t enabled or disabled tracing.\n"
5701 "\t To remove a trigger without a count:\n"
5702 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5703 "\t To remove a trigger with a count:\n"
5704 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5705 "\t Filters can be ignored when removing a trigger.\n"
5706 #ifdef CONFIG_HIST_TRIGGERS
5707 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5708 "\t Format: hist:keys=<field1[,field2,...]>\n"
5709 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5710 "\t [:values=<field1[,field2,...]>]\n"
5711 "\t [:sort=<field1[,field2,...]>]\n"
5712 "\t [:size=#entries]\n"
5713 "\t [:pause][:continue][:clear]\n"
5714 "\t [:name=histname1]\n"
5715 "\t [:<handler>.<action>]\n"
5716 "\t [if <filter>]\n\n"
5717 "\t Note, special fields can be used as well:\n"
5718 "\t common_timestamp - to record current timestamp\n"
5719 "\t common_cpu - to record the CPU the event happened on\n"
5720 "\n"
5721 "\t A hist trigger variable can be:\n"
5722 "\t - a reference to a field e.g. x=current_timestamp,\n"
5723 "\t - a reference to another variable e.g. y=$x,\n"
5724 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5725 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5726 "\n"
5727 "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
5728 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5729 "\t variable reference, field or numeric literal.\n"
5730 "\n"
5731 "\t When a matching event is hit, an entry is added to a hash\n"
5732 "\t table using the key(s) and value(s) named, and the value of a\n"
5733 "\t sum called 'hitcount' is incremented. Keys and values\n"
5734 "\t correspond to fields in the event's format description. Keys\n"
5735 "\t can be any field, or the special string 'stacktrace'.\n"
5736 "\t Compound keys consisting of up to two fields can be specified\n"
5737 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5738 "\t fields. Sort keys consisting of up to two fields can be\n"
5739 "\t specified using the 'sort' keyword. The sort direction can\n"
5740 "\t be modified by appending '.descending' or '.ascending' to a\n"
5741 "\t sort field. The 'size' parameter can be used to specify more\n"
5742 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5743 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5744 "\t its histogram data will be shared with other triggers of the\n"
5745 "\t same name, and trigger hits will update this common data.\n\n"
5746 "\t Reading the 'hist' file for the event will dump the hash\n"
5747 "\t table in its entirety to stdout. If there are multiple hist\n"
5748 "\t triggers attached to an event, there will be a table for each\n"
5749 "\t trigger in the output. The table displayed for a named\n"
5750 "\t trigger will be the same as any other instance having the\n"
5751 "\t same name. The default format used to display a given field\n"
5752 "\t can be modified by appending any of the following modifiers\n"
5753 "\t to the field name, as applicable:\n\n"
5754 "\t .hex display a number as a hex value\n"
5755 "\t .sym display an address as a symbol\n"
5756 "\t .sym-offset display an address as a symbol and offset\n"
5757 "\t .execname display a common_pid as a program name\n"
5758 "\t .syscall display a syscall id as a syscall name\n"
5759 "\t .log2 display log2 value rather than raw number\n"
5760 "\t .buckets=size display values in groups of size rather than raw number\n"
5761 "\t .usecs display a common_timestamp in microseconds\n\n"
5762 "\t The 'pause' parameter can be used to pause an existing hist\n"
5763 "\t trigger or to start a hist trigger but not log any events\n"
5764 "\t until told to do so. 'continue' can be used to start or\n"
5765 "\t restart a paused hist trigger.\n\n"
5766 "\t The 'clear' parameter will clear the contents of a running\n"
5767 "\t hist trigger and leave its current paused/active state\n"
5768 "\t unchanged.\n\n"
5769 "\t The enable_hist and disable_hist triggers can be used to\n"
5770 "\t have one event conditionally start and stop another event's\n"
5771 "\t already-attached hist trigger. The syntax is analogous to\n"
5772 "\t the enable_event and disable_event triggers.\n\n"
5773 "\t Hist trigger handlers and actions are executed whenever a\n"
5774 "\t a histogram entry is added or updated. They take the form:\n\n"
5775 "\t <handler>.<action>\n\n"
5776 "\t The available handlers are:\n\n"
5777 "\t onmatch(matching.event) - invoke on addition or update\n"
5778 "\t onmax(var) - invoke if var exceeds current max\n"
5779 "\t onchange(var) - invoke action if var changes\n\n"
5780 "\t The available actions are:\n\n"
5781 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5782 "\t save(field,...) - save current event fields\n"
5783 #ifdef CONFIG_TRACER_SNAPSHOT
5784 "\t snapshot() - snapshot the trace buffer\n\n"
5785 #endif
5786 #ifdef CONFIG_SYNTH_EVENTS
5787 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5788 "\t Write into this file to define/undefine new synthetic events.\n"
5789 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5790 #endif
5791 #endif
5792 ;
5793
5794 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5795 tracing_readme_read(struct file *filp, char __user *ubuf,
5796 size_t cnt, loff_t *ppos)
5797 {
5798 return simple_read_from_buffer(ubuf, cnt, ppos,
5799 readme_msg, strlen(readme_msg));
5800 }
5801
5802 static const struct file_operations tracing_readme_fops = {
5803 .open = tracing_open_generic,
5804 .read = tracing_readme_read,
5805 .llseek = generic_file_llseek,
5806 };
5807
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5808 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5809 {
5810 int pid = ++(*pos);
5811
5812 return trace_find_tgid_ptr(pid);
5813 }
5814
saved_tgids_start(struct seq_file * m,loff_t * pos)5815 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5816 {
5817 int pid = *pos;
5818
5819 return trace_find_tgid_ptr(pid);
5820 }
5821
saved_tgids_stop(struct seq_file * m,void * v)5822 static void saved_tgids_stop(struct seq_file *m, void *v)
5823 {
5824 }
5825
saved_tgids_show(struct seq_file * m,void * v)5826 static int saved_tgids_show(struct seq_file *m, void *v)
5827 {
5828 int *entry = (int *)v;
5829 int pid = entry - tgid_map;
5830 int tgid = *entry;
5831
5832 if (tgid == 0)
5833 return SEQ_SKIP;
5834
5835 seq_printf(m, "%d %d\n", pid, tgid);
5836 return 0;
5837 }
5838
5839 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5840 .start = saved_tgids_start,
5841 .stop = saved_tgids_stop,
5842 .next = saved_tgids_next,
5843 .show = saved_tgids_show,
5844 };
5845
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5846 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5847 {
5848 int ret;
5849
5850 ret = tracing_check_open_get_tr(NULL);
5851 if (ret)
5852 return ret;
5853
5854 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5855 }
5856
5857
5858 static const struct file_operations tracing_saved_tgids_fops = {
5859 .open = tracing_saved_tgids_open,
5860 .read = seq_read,
5861 .llseek = seq_lseek,
5862 .release = seq_release,
5863 };
5864
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5865 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5866 {
5867 unsigned int *ptr = v;
5868
5869 if (*pos || m->count)
5870 ptr++;
5871
5872 (*pos)++;
5873
5874 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5875 ptr++) {
5876 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5877 continue;
5878
5879 return ptr;
5880 }
5881
5882 return NULL;
5883 }
5884
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5885 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5886 {
5887 void *v;
5888 loff_t l = 0;
5889
5890 preempt_disable();
5891 arch_spin_lock(&trace_cmdline_lock);
5892
5893 v = &savedcmd->map_cmdline_to_pid[0];
5894 while (l <= *pos) {
5895 v = saved_cmdlines_next(m, v, &l);
5896 if (!v)
5897 return NULL;
5898 }
5899
5900 return v;
5901 }
5902
saved_cmdlines_stop(struct seq_file * m,void * v)5903 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5904 {
5905 arch_spin_unlock(&trace_cmdline_lock);
5906 preempt_enable();
5907 }
5908
saved_cmdlines_show(struct seq_file * m,void * v)5909 static int saved_cmdlines_show(struct seq_file *m, void *v)
5910 {
5911 char buf[TASK_COMM_LEN];
5912 unsigned int *pid = v;
5913
5914 __trace_find_cmdline(*pid, buf);
5915 seq_printf(m, "%d %s\n", *pid, buf);
5916 return 0;
5917 }
5918
5919 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5920 .start = saved_cmdlines_start,
5921 .next = saved_cmdlines_next,
5922 .stop = saved_cmdlines_stop,
5923 .show = saved_cmdlines_show,
5924 };
5925
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5926 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5927 {
5928 int ret;
5929
5930 ret = tracing_check_open_get_tr(NULL);
5931 if (ret)
5932 return ret;
5933
5934 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5935 }
5936
5937 static const struct file_operations tracing_saved_cmdlines_fops = {
5938 .open = tracing_saved_cmdlines_open,
5939 .read = seq_read,
5940 .llseek = seq_lseek,
5941 .release = seq_release,
5942 };
5943
5944 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5945 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5946 size_t cnt, loff_t *ppos)
5947 {
5948 char buf[64];
5949 int r;
5950
5951 preempt_disable();
5952 arch_spin_lock(&trace_cmdline_lock);
5953 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5954 arch_spin_unlock(&trace_cmdline_lock);
5955 preempt_enable();
5956
5957 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5958 }
5959
tracing_resize_saved_cmdlines(unsigned int val)5960 static int tracing_resize_saved_cmdlines(unsigned int val)
5961 {
5962 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5963
5964 s = allocate_cmdlines_buffer(val);
5965 if (!s)
5966 return -ENOMEM;
5967
5968 preempt_disable();
5969 arch_spin_lock(&trace_cmdline_lock);
5970 savedcmd_temp = savedcmd;
5971 savedcmd = s;
5972 arch_spin_unlock(&trace_cmdline_lock);
5973 preempt_enable();
5974 free_saved_cmdlines_buffer(savedcmd_temp);
5975
5976 return 0;
5977 }
5978
5979 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5980 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5981 size_t cnt, loff_t *ppos)
5982 {
5983 unsigned long val;
5984 int ret;
5985
5986 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5987 if (ret)
5988 return ret;
5989
5990 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5991 if (!val || val > PID_MAX_DEFAULT)
5992 return -EINVAL;
5993
5994 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5995 if (ret < 0)
5996 return ret;
5997
5998 *ppos += cnt;
5999
6000 return cnt;
6001 }
6002
6003 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6004 .open = tracing_open_generic,
6005 .read = tracing_saved_cmdlines_size_read,
6006 .write = tracing_saved_cmdlines_size_write,
6007 };
6008
6009 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6010 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6011 update_eval_map(union trace_eval_map_item *ptr)
6012 {
6013 if (!ptr->map.eval_string) {
6014 if (ptr->tail.next) {
6015 ptr = ptr->tail.next;
6016 /* Set ptr to the next real item (skip head) */
6017 ptr++;
6018 } else
6019 return NULL;
6020 }
6021 return ptr;
6022 }
6023
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6024 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6025 {
6026 union trace_eval_map_item *ptr = v;
6027
6028 /*
6029 * Paranoid! If ptr points to end, we don't want to increment past it.
6030 * This really should never happen.
6031 */
6032 (*pos)++;
6033 ptr = update_eval_map(ptr);
6034 if (WARN_ON_ONCE(!ptr))
6035 return NULL;
6036
6037 ptr++;
6038 ptr = update_eval_map(ptr);
6039
6040 return ptr;
6041 }
6042
eval_map_start(struct seq_file * m,loff_t * pos)6043 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6044 {
6045 union trace_eval_map_item *v;
6046 loff_t l = 0;
6047
6048 mutex_lock(&trace_eval_mutex);
6049
6050 v = trace_eval_maps;
6051 if (v)
6052 v++;
6053
6054 while (v && l < *pos) {
6055 v = eval_map_next(m, v, &l);
6056 }
6057
6058 return v;
6059 }
6060
eval_map_stop(struct seq_file * m,void * v)6061 static void eval_map_stop(struct seq_file *m, void *v)
6062 {
6063 mutex_unlock(&trace_eval_mutex);
6064 }
6065
eval_map_show(struct seq_file * m,void * v)6066 static int eval_map_show(struct seq_file *m, void *v)
6067 {
6068 union trace_eval_map_item *ptr = v;
6069
6070 seq_printf(m, "%s %ld (%s)\n",
6071 ptr->map.eval_string, ptr->map.eval_value,
6072 ptr->map.system);
6073
6074 return 0;
6075 }
6076
6077 static const struct seq_operations tracing_eval_map_seq_ops = {
6078 .start = eval_map_start,
6079 .next = eval_map_next,
6080 .stop = eval_map_stop,
6081 .show = eval_map_show,
6082 };
6083
tracing_eval_map_open(struct inode * inode,struct file * filp)6084 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6085 {
6086 int ret;
6087
6088 ret = tracing_check_open_get_tr(NULL);
6089 if (ret)
6090 return ret;
6091
6092 return seq_open(filp, &tracing_eval_map_seq_ops);
6093 }
6094
6095 static const struct file_operations tracing_eval_map_fops = {
6096 .open = tracing_eval_map_open,
6097 .read = seq_read,
6098 .llseek = seq_lseek,
6099 .release = seq_release,
6100 };
6101
6102 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6103 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6104 {
6105 /* Return tail of array given the head */
6106 return ptr + ptr->head.length + 1;
6107 }
6108
6109 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6110 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6111 int len)
6112 {
6113 struct trace_eval_map **stop;
6114 struct trace_eval_map **map;
6115 union trace_eval_map_item *map_array;
6116 union trace_eval_map_item *ptr;
6117
6118 stop = start + len;
6119
6120 /*
6121 * The trace_eval_maps contains the map plus a head and tail item,
6122 * where the head holds the module and length of array, and the
6123 * tail holds a pointer to the next list.
6124 */
6125 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6126 if (!map_array) {
6127 pr_warn("Unable to allocate trace eval mapping\n");
6128 return;
6129 }
6130
6131 mutex_lock(&trace_eval_mutex);
6132
6133 if (!trace_eval_maps)
6134 trace_eval_maps = map_array;
6135 else {
6136 ptr = trace_eval_maps;
6137 for (;;) {
6138 ptr = trace_eval_jmp_to_tail(ptr);
6139 if (!ptr->tail.next)
6140 break;
6141 ptr = ptr->tail.next;
6142
6143 }
6144 ptr->tail.next = map_array;
6145 }
6146 map_array->head.mod = mod;
6147 map_array->head.length = len;
6148 map_array++;
6149
6150 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6151 map_array->map = **map;
6152 map_array++;
6153 }
6154 memset(map_array, 0, sizeof(*map_array));
6155
6156 mutex_unlock(&trace_eval_mutex);
6157 }
6158
trace_create_eval_file(struct dentry * d_tracer)6159 static void trace_create_eval_file(struct dentry *d_tracer)
6160 {
6161 trace_create_file("eval_map", 0444, d_tracer,
6162 NULL, &tracing_eval_map_fops);
6163 }
6164
6165 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6166 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6167 static inline void trace_insert_eval_map_file(struct module *mod,
6168 struct trace_eval_map **start, int len) { }
6169 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6170
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6171 static void trace_insert_eval_map(struct module *mod,
6172 struct trace_eval_map **start, int len)
6173 {
6174 struct trace_eval_map **map;
6175
6176 if (len <= 0)
6177 return;
6178
6179 map = start;
6180
6181 trace_event_eval_update(map, len);
6182
6183 trace_insert_eval_map_file(mod, start, len);
6184 }
6185
6186 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6187 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6188 size_t cnt, loff_t *ppos)
6189 {
6190 struct trace_array *tr = filp->private_data;
6191 char buf[MAX_TRACER_SIZE+2];
6192 int r;
6193
6194 mutex_lock(&trace_types_lock);
6195 r = sprintf(buf, "%s\n", tr->current_trace->name);
6196 mutex_unlock(&trace_types_lock);
6197
6198 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6199 }
6200
tracer_init(struct tracer * t,struct trace_array * tr)6201 int tracer_init(struct tracer *t, struct trace_array *tr)
6202 {
6203 tracing_reset_online_cpus(&tr->array_buffer);
6204 return t->init(tr);
6205 }
6206
set_buffer_entries(struct array_buffer * buf,unsigned long val)6207 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6208 {
6209 int cpu;
6210
6211 for_each_tracing_cpu(cpu)
6212 per_cpu_ptr(buf->data, cpu)->entries = val;
6213 }
6214
update_buffer_entries(struct array_buffer * buf,int cpu)6215 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6216 {
6217 if (cpu == RING_BUFFER_ALL_CPUS) {
6218 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6219 } else {
6220 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6221 }
6222 }
6223
6224 #ifdef CONFIG_TRACER_MAX_TRACE
6225 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6226 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6227 struct array_buffer *size_buf, int cpu_id)
6228 {
6229 int cpu, ret = 0;
6230
6231 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6232 for_each_tracing_cpu(cpu) {
6233 ret = ring_buffer_resize(trace_buf->buffer,
6234 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6235 if (ret < 0)
6236 break;
6237 per_cpu_ptr(trace_buf->data, cpu)->entries =
6238 per_cpu_ptr(size_buf->data, cpu)->entries;
6239 }
6240 } else {
6241 ret = ring_buffer_resize(trace_buf->buffer,
6242 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6243 if (ret == 0)
6244 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6245 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6246 }
6247
6248 return ret;
6249 }
6250 #endif /* CONFIG_TRACER_MAX_TRACE */
6251
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6252 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6253 unsigned long size, int cpu)
6254 {
6255 int ret;
6256
6257 /*
6258 * If kernel or user changes the size of the ring buffer
6259 * we use the size that was given, and we can forget about
6260 * expanding it later.
6261 */
6262 ring_buffer_expanded = true;
6263
6264 /* May be called before buffers are initialized */
6265 if (!tr->array_buffer.buffer)
6266 return 0;
6267
6268 /* Do not allow tracing while resizing ring buffer */
6269 tracing_stop_tr(tr);
6270
6271 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6272 if (ret < 0)
6273 goto out_start;
6274
6275 #ifdef CONFIG_TRACER_MAX_TRACE
6276 if (!tr->allocated_snapshot)
6277 goto out;
6278
6279 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6280 if (ret < 0) {
6281 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6282 &tr->array_buffer, cpu);
6283 if (r < 0) {
6284 /*
6285 * AARGH! We are left with different
6286 * size max buffer!!!!
6287 * The max buffer is our "snapshot" buffer.
6288 * When a tracer needs a snapshot (one of the
6289 * latency tracers), it swaps the max buffer
6290 * with the saved snap shot. We succeeded to
6291 * update the size of the main buffer, but failed to
6292 * update the size of the max buffer. But when we tried
6293 * to reset the main buffer to the original size, we
6294 * failed there too. This is very unlikely to
6295 * happen, but if it does, warn and kill all
6296 * tracing.
6297 */
6298 WARN_ON(1);
6299 tracing_disabled = 1;
6300 }
6301 goto out_start;
6302 }
6303
6304 update_buffer_entries(&tr->max_buffer, cpu);
6305
6306 out:
6307 #endif /* CONFIG_TRACER_MAX_TRACE */
6308
6309 update_buffer_entries(&tr->array_buffer, cpu);
6310 out_start:
6311 tracing_start_tr(tr);
6312 return ret;
6313 }
6314
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6315 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6316 unsigned long size, int cpu_id)
6317 {
6318 int ret;
6319
6320 mutex_lock(&trace_types_lock);
6321
6322 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6323 /* make sure, this cpu is enabled in the mask */
6324 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6325 ret = -EINVAL;
6326 goto out;
6327 }
6328 }
6329
6330 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6331 if (ret < 0)
6332 ret = -ENOMEM;
6333
6334 out:
6335 mutex_unlock(&trace_types_lock);
6336
6337 return ret;
6338 }
6339
6340
6341 /**
6342 * tracing_update_buffers - used by tracing facility to expand ring buffers
6343 *
6344 * To save on memory when the tracing is never used on a system with it
6345 * configured in. The ring buffers are set to a minimum size. But once
6346 * a user starts to use the tracing facility, then they need to grow
6347 * to their default size.
6348 *
6349 * This function is to be called when a tracer is about to be used.
6350 */
tracing_update_buffers(void)6351 int tracing_update_buffers(void)
6352 {
6353 int ret = 0;
6354
6355 mutex_lock(&trace_types_lock);
6356 if (!ring_buffer_expanded)
6357 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6358 RING_BUFFER_ALL_CPUS);
6359 mutex_unlock(&trace_types_lock);
6360
6361 return ret;
6362 }
6363
6364 struct trace_option_dentry;
6365
6366 static void
6367 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6368
6369 /*
6370 * Used to clear out the tracer before deletion of an instance.
6371 * Must have trace_types_lock held.
6372 */
tracing_set_nop(struct trace_array * tr)6373 static void tracing_set_nop(struct trace_array *tr)
6374 {
6375 if (tr->current_trace == &nop_trace)
6376 return;
6377
6378 tr->current_trace->enabled--;
6379
6380 if (tr->current_trace->reset)
6381 tr->current_trace->reset(tr);
6382
6383 tr->current_trace = &nop_trace;
6384 }
6385
6386 static bool tracer_options_updated;
6387
add_tracer_options(struct trace_array * tr,struct tracer * t)6388 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6389 {
6390 /* Only enable if the directory has been created already. */
6391 if (!tr->dir)
6392 return;
6393
6394 /* Only create trace option files after update_tracer_options finish */
6395 if (!tracer_options_updated)
6396 return;
6397
6398 create_trace_option_files(tr, t);
6399 }
6400
tracing_set_tracer(struct trace_array * tr,const char * buf)6401 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6402 {
6403 struct tracer *t;
6404 #ifdef CONFIG_TRACER_MAX_TRACE
6405 bool had_max_tr;
6406 #endif
6407 int ret = 0;
6408
6409 mutex_lock(&trace_types_lock);
6410
6411 if (!ring_buffer_expanded) {
6412 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6413 RING_BUFFER_ALL_CPUS);
6414 if (ret < 0)
6415 goto out;
6416 ret = 0;
6417 }
6418
6419 for (t = trace_types; t; t = t->next) {
6420 if (strcmp(t->name, buf) == 0)
6421 break;
6422 }
6423 if (!t) {
6424 ret = -EINVAL;
6425 goto out;
6426 }
6427 if (t == tr->current_trace)
6428 goto out;
6429
6430 #ifdef CONFIG_TRACER_SNAPSHOT
6431 if (t->use_max_tr) {
6432 local_irq_disable();
6433 arch_spin_lock(&tr->max_lock);
6434 if (tr->cond_snapshot)
6435 ret = -EBUSY;
6436 arch_spin_unlock(&tr->max_lock);
6437 local_irq_enable();
6438 if (ret)
6439 goto out;
6440 }
6441 #endif
6442 /* Some tracers won't work on kernel command line */
6443 if (system_state < SYSTEM_RUNNING && t->noboot) {
6444 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6445 t->name);
6446 goto out;
6447 }
6448
6449 /* Some tracers are only allowed for the top level buffer */
6450 if (!trace_ok_for_array(t, tr)) {
6451 ret = -EINVAL;
6452 goto out;
6453 }
6454
6455 /* If trace pipe files are being read, we can't change the tracer */
6456 if (tr->trace_ref) {
6457 ret = -EBUSY;
6458 goto out;
6459 }
6460
6461 trace_branch_disable();
6462
6463 tr->current_trace->enabled--;
6464
6465 if (tr->current_trace->reset)
6466 tr->current_trace->reset(tr);
6467
6468 #ifdef CONFIG_TRACER_MAX_TRACE
6469 had_max_tr = tr->current_trace->use_max_tr;
6470
6471 /* Current trace needs to be nop_trace before synchronize_rcu */
6472 tr->current_trace = &nop_trace;
6473
6474 if (had_max_tr && !t->use_max_tr) {
6475 /*
6476 * We need to make sure that the update_max_tr sees that
6477 * current_trace changed to nop_trace to keep it from
6478 * swapping the buffers after we resize it.
6479 * The update_max_tr is called from interrupts disabled
6480 * so a synchronized_sched() is sufficient.
6481 */
6482 synchronize_rcu();
6483 free_snapshot(tr);
6484 }
6485
6486 if (t->use_max_tr && !tr->allocated_snapshot) {
6487 ret = tracing_alloc_snapshot_instance(tr);
6488 if (ret < 0)
6489 goto out;
6490 }
6491 #else
6492 tr->current_trace = &nop_trace;
6493 #endif
6494
6495 if (t->init) {
6496 ret = tracer_init(t, tr);
6497 if (ret)
6498 goto out;
6499 }
6500
6501 tr->current_trace = t;
6502 tr->current_trace->enabled++;
6503 trace_branch_enable(tr);
6504 out:
6505 mutex_unlock(&trace_types_lock);
6506
6507 return ret;
6508 }
6509
6510 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6511 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6512 size_t cnt, loff_t *ppos)
6513 {
6514 struct trace_array *tr = filp->private_data;
6515 char buf[MAX_TRACER_SIZE+1];
6516 int i;
6517 size_t ret;
6518 int err;
6519
6520 ret = cnt;
6521
6522 if (cnt > MAX_TRACER_SIZE)
6523 cnt = MAX_TRACER_SIZE;
6524
6525 if (copy_from_user(buf, ubuf, cnt))
6526 return -EFAULT;
6527
6528 buf[cnt] = 0;
6529
6530 /* strip ending whitespace. */
6531 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6532 buf[i] = 0;
6533
6534 err = tracing_set_tracer(tr, buf);
6535 if (err)
6536 return err;
6537
6538 *ppos += ret;
6539
6540 return ret;
6541 }
6542
6543 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6544 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6545 size_t cnt, loff_t *ppos)
6546 {
6547 char buf[64];
6548 int r;
6549
6550 r = snprintf(buf, sizeof(buf), "%ld\n",
6551 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6552 if (r > sizeof(buf))
6553 r = sizeof(buf);
6554 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6555 }
6556
6557 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6558 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6559 size_t cnt, loff_t *ppos)
6560 {
6561 unsigned long val;
6562 int ret;
6563
6564 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6565 if (ret)
6566 return ret;
6567
6568 *ptr = val * 1000;
6569
6570 return cnt;
6571 }
6572
6573 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6574 tracing_thresh_read(struct file *filp, char __user *ubuf,
6575 size_t cnt, loff_t *ppos)
6576 {
6577 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6578 }
6579
6580 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6581 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6582 size_t cnt, loff_t *ppos)
6583 {
6584 struct trace_array *tr = filp->private_data;
6585 int ret;
6586
6587 mutex_lock(&trace_types_lock);
6588 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6589 if (ret < 0)
6590 goto out;
6591
6592 if (tr->current_trace->update_thresh) {
6593 ret = tr->current_trace->update_thresh(tr);
6594 if (ret < 0)
6595 goto out;
6596 }
6597
6598 ret = cnt;
6599 out:
6600 mutex_unlock(&trace_types_lock);
6601
6602 return ret;
6603 }
6604
6605 #ifdef CONFIG_TRACER_MAX_TRACE
6606
6607 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6608 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6609 size_t cnt, loff_t *ppos)
6610 {
6611 struct trace_array *tr = filp->private_data;
6612
6613 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6614 }
6615
6616 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6617 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6618 size_t cnt, loff_t *ppos)
6619 {
6620 struct trace_array *tr = filp->private_data;
6621
6622 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6623 }
6624
6625 #endif
6626
tracing_open_pipe(struct inode * inode,struct file * filp)6627 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6628 {
6629 struct trace_array *tr = inode->i_private;
6630 struct trace_iterator *iter;
6631 int ret;
6632
6633 ret = tracing_check_open_get_tr(tr);
6634 if (ret)
6635 return ret;
6636
6637 mutex_lock(&trace_types_lock);
6638
6639 /* create a buffer to store the information to pass to userspace */
6640 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6641 if (!iter) {
6642 ret = -ENOMEM;
6643 __trace_array_put(tr);
6644 goto out;
6645 }
6646
6647 trace_seq_init(&iter->seq);
6648 iter->trace = tr->current_trace;
6649
6650 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6651 ret = -ENOMEM;
6652 goto fail;
6653 }
6654
6655 /* trace pipe does not show start of buffer */
6656 cpumask_setall(iter->started);
6657
6658 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6659 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6660
6661 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6662 if (trace_clocks[tr->clock_id].in_ns)
6663 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6664
6665 iter->tr = tr;
6666 iter->array_buffer = &tr->array_buffer;
6667 iter->cpu_file = tracing_get_cpu(inode);
6668 mutex_init(&iter->mutex);
6669 filp->private_data = iter;
6670
6671 if (iter->trace->pipe_open)
6672 iter->trace->pipe_open(iter);
6673
6674 nonseekable_open(inode, filp);
6675
6676 tr->trace_ref++;
6677 out:
6678 mutex_unlock(&trace_types_lock);
6679 return ret;
6680
6681 fail:
6682 kfree(iter);
6683 __trace_array_put(tr);
6684 mutex_unlock(&trace_types_lock);
6685 return ret;
6686 }
6687
tracing_release_pipe(struct inode * inode,struct file * file)6688 static int tracing_release_pipe(struct inode *inode, struct file *file)
6689 {
6690 struct trace_iterator *iter = file->private_data;
6691 struct trace_array *tr = inode->i_private;
6692
6693 mutex_lock(&trace_types_lock);
6694
6695 tr->trace_ref--;
6696
6697 if (iter->trace->pipe_close)
6698 iter->trace->pipe_close(iter);
6699
6700 mutex_unlock(&trace_types_lock);
6701
6702 free_cpumask_var(iter->started);
6703 kfree(iter->fmt);
6704 kfree(iter->temp);
6705 mutex_destroy(&iter->mutex);
6706 kfree(iter);
6707
6708 trace_array_put(tr);
6709
6710 return 0;
6711 }
6712
6713 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6714 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6715 {
6716 struct trace_array *tr = iter->tr;
6717
6718 /* Iterators are static, they should be filled or empty */
6719 if (trace_buffer_iter(iter, iter->cpu_file))
6720 return EPOLLIN | EPOLLRDNORM;
6721
6722 if (tr->trace_flags & TRACE_ITER_BLOCK)
6723 /*
6724 * Always select as readable when in blocking mode
6725 */
6726 return EPOLLIN | EPOLLRDNORM;
6727 else
6728 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6729 filp, poll_table, iter->tr->buffer_percent);
6730 }
6731
6732 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6733 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6734 {
6735 struct trace_iterator *iter = filp->private_data;
6736
6737 return trace_poll(iter, filp, poll_table);
6738 }
6739
6740 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6741 static int tracing_wait_pipe(struct file *filp)
6742 {
6743 struct trace_iterator *iter = filp->private_data;
6744 int ret;
6745
6746 while (trace_empty(iter)) {
6747
6748 if ((filp->f_flags & O_NONBLOCK)) {
6749 return -EAGAIN;
6750 }
6751
6752 /*
6753 * We block until we read something and tracing is disabled.
6754 * We still block if tracing is disabled, but we have never
6755 * read anything. This allows a user to cat this file, and
6756 * then enable tracing. But after we have read something,
6757 * we give an EOF when tracing is again disabled.
6758 *
6759 * iter->pos will be 0 if we haven't read anything.
6760 */
6761 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6762 break;
6763
6764 mutex_unlock(&iter->mutex);
6765
6766 ret = wait_on_pipe(iter, 0);
6767
6768 mutex_lock(&iter->mutex);
6769
6770 if (ret)
6771 return ret;
6772 }
6773
6774 return 1;
6775 }
6776
6777 /*
6778 * Consumer reader.
6779 */
6780 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6781 tracing_read_pipe(struct file *filp, char __user *ubuf,
6782 size_t cnt, loff_t *ppos)
6783 {
6784 struct trace_iterator *iter = filp->private_data;
6785 ssize_t sret;
6786
6787 /*
6788 * Avoid more than one consumer on a single file descriptor
6789 * This is just a matter of traces coherency, the ring buffer itself
6790 * is protected.
6791 */
6792 mutex_lock(&iter->mutex);
6793
6794 /* return any leftover data */
6795 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6796 if (sret != -EBUSY)
6797 goto out;
6798
6799 trace_seq_init(&iter->seq);
6800
6801 if (iter->trace->read) {
6802 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6803 if (sret)
6804 goto out;
6805 }
6806
6807 waitagain:
6808 sret = tracing_wait_pipe(filp);
6809 if (sret <= 0)
6810 goto out;
6811
6812 /* stop when tracing is finished */
6813 if (trace_empty(iter)) {
6814 sret = 0;
6815 goto out;
6816 }
6817
6818 if (cnt >= PAGE_SIZE)
6819 cnt = PAGE_SIZE - 1;
6820
6821 /* reset all but tr, trace, and overruns */
6822 memset(&iter->seq, 0,
6823 sizeof(struct trace_iterator) -
6824 offsetof(struct trace_iterator, seq));
6825 cpumask_clear(iter->started);
6826 trace_seq_init(&iter->seq);
6827 iter->pos = -1;
6828
6829 trace_event_read_lock();
6830 trace_access_lock(iter->cpu_file);
6831 while (trace_find_next_entry_inc(iter) != NULL) {
6832 enum print_line_t ret;
6833 int save_len = iter->seq.seq.len;
6834
6835 ret = print_trace_line(iter);
6836 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6837 /*
6838 * If one print_trace_line() fills entire trace_seq in one shot,
6839 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6840 * In this case, we need to consume it, otherwise, loop will peek
6841 * this event next time, resulting in an infinite loop.
6842 */
6843 if (save_len == 0) {
6844 iter->seq.full = 0;
6845 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6846 trace_consume(iter);
6847 break;
6848 }
6849
6850 /* In other cases, don't print partial lines */
6851 iter->seq.seq.len = save_len;
6852 break;
6853 }
6854 if (ret != TRACE_TYPE_NO_CONSUME)
6855 trace_consume(iter);
6856
6857 if (trace_seq_used(&iter->seq) >= cnt)
6858 break;
6859
6860 /*
6861 * Setting the full flag means we reached the trace_seq buffer
6862 * size and we should leave by partial output condition above.
6863 * One of the trace_seq_* functions is not used properly.
6864 */
6865 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6866 iter->ent->type);
6867 }
6868 trace_access_unlock(iter->cpu_file);
6869 trace_event_read_unlock();
6870
6871 /* Now copy what we have to the user */
6872 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6873 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6874 trace_seq_init(&iter->seq);
6875
6876 /*
6877 * If there was nothing to send to user, in spite of consuming trace
6878 * entries, go back to wait for more entries.
6879 */
6880 if (sret == -EBUSY)
6881 goto waitagain;
6882
6883 out:
6884 mutex_unlock(&iter->mutex);
6885
6886 return sret;
6887 }
6888
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6889 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6890 unsigned int idx)
6891 {
6892 __free_page(spd->pages[idx]);
6893 }
6894
6895 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6896 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6897 {
6898 size_t count;
6899 int save_len;
6900 int ret;
6901
6902 /* Seq buffer is page-sized, exactly what we need. */
6903 for (;;) {
6904 save_len = iter->seq.seq.len;
6905 ret = print_trace_line(iter);
6906
6907 if (trace_seq_has_overflowed(&iter->seq)) {
6908 iter->seq.seq.len = save_len;
6909 break;
6910 }
6911
6912 /*
6913 * This should not be hit, because it should only
6914 * be set if the iter->seq overflowed. But check it
6915 * anyway to be safe.
6916 */
6917 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6918 iter->seq.seq.len = save_len;
6919 break;
6920 }
6921
6922 count = trace_seq_used(&iter->seq) - save_len;
6923 if (rem < count) {
6924 rem = 0;
6925 iter->seq.seq.len = save_len;
6926 break;
6927 }
6928
6929 if (ret != TRACE_TYPE_NO_CONSUME)
6930 trace_consume(iter);
6931 rem -= count;
6932 if (!trace_find_next_entry_inc(iter)) {
6933 rem = 0;
6934 iter->ent = NULL;
6935 break;
6936 }
6937 }
6938
6939 return rem;
6940 }
6941
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6942 static ssize_t tracing_splice_read_pipe(struct file *filp,
6943 loff_t *ppos,
6944 struct pipe_inode_info *pipe,
6945 size_t len,
6946 unsigned int flags)
6947 {
6948 struct page *pages_def[PIPE_DEF_BUFFERS];
6949 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6950 struct trace_iterator *iter = filp->private_data;
6951 struct splice_pipe_desc spd = {
6952 .pages = pages_def,
6953 .partial = partial_def,
6954 .nr_pages = 0, /* This gets updated below. */
6955 .nr_pages_max = PIPE_DEF_BUFFERS,
6956 .ops = &default_pipe_buf_ops,
6957 .spd_release = tracing_spd_release_pipe,
6958 };
6959 ssize_t ret;
6960 size_t rem;
6961 unsigned int i;
6962
6963 if (splice_grow_spd(pipe, &spd))
6964 return -ENOMEM;
6965
6966 mutex_lock(&iter->mutex);
6967
6968 if (iter->trace->splice_read) {
6969 ret = iter->trace->splice_read(iter, filp,
6970 ppos, pipe, len, flags);
6971 if (ret)
6972 goto out_err;
6973 }
6974
6975 ret = tracing_wait_pipe(filp);
6976 if (ret <= 0)
6977 goto out_err;
6978
6979 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6980 ret = -EFAULT;
6981 goto out_err;
6982 }
6983
6984 trace_event_read_lock();
6985 trace_access_lock(iter->cpu_file);
6986
6987 /* Fill as many pages as possible. */
6988 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6989 spd.pages[i] = alloc_page(GFP_KERNEL);
6990 if (!spd.pages[i])
6991 break;
6992
6993 rem = tracing_fill_pipe_page(rem, iter);
6994
6995 /* Copy the data into the page, so we can start over. */
6996 ret = trace_seq_to_buffer(&iter->seq,
6997 page_address(spd.pages[i]),
6998 trace_seq_used(&iter->seq));
6999 if (ret < 0) {
7000 __free_page(spd.pages[i]);
7001 break;
7002 }
7003 spd.partial[i].offset = 0;
7004 spd.partial[i].len = trace_seq_used(&iter->seq);
7005
7006 trace_seq_init(&iter->seq);
7007 }
7008
7009 trace_access_unlock(iter->cpu_file);
7010 trace_event_read_unlock();
7011 mutex_unlock(&iter->mutex);
7012
7013 spd.nr_pages = i;
7014
7015 if (i)
7016 ret = splice_to_pipe(pipe, &spd);
7017 else
7018 ret = 0;
7019 out:
7020 splice_shrink_spd(&spd);
7021 return ret;
7022
7023 out_err:
7024 mutex_unlock(&iter->mutex);
7025 goto out;
7026 }
7027
7028 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7029 tracing_entries_read(struct file *filp, char __user *ubuf,
7030 size_t cnt, loff_t *ppos)
7031 {
7032 struct inode *inode = file_inode(filp);
7033 struct trace_array *tr = inode->i_private;
7034 int cpu = tracing_get_cpu(inode);
7035 char buf[64];
7036 int r = 0;
7037 ssize_t ret;
7038
7039 mutex_lock(&trace_types_lock);
7040
7041 if (cpu == RING_BUFFER_ALL_CPUS) {
7042 int cpu, buf_size_same;
7043 unsigned long size;
7044
7045 size = 0;
7046 buf_size_same = 1;
7047 /* check if all cpu sizes are same */
7048 for_each_tracing_cpu(cpu) {
7049 /* fill in the size from first enabled cpu */
7050 if (size == 0)
7051 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7052 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7053 buf_size_same = 0;
7054 break;
7055 }
7056 }
7057
7058 if (buf_size_same) {
7059 if (!ring_buffer_expanded)
7060 r = sprintf(buf, "%lu (expanded: %lu)\n",
7061 size >> 10,
7062 trace_buf_size >> 10);
7063 else
7064 r = sprintf(buf, "%lu\n", size >> 10);
7065 } else
7066 r = sprintf(buf, "X\n");
7067 } else
7068 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7069
7070 mutex_unlock(&trace_types_lock);
7071
7072 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7073 return ret;
7074 }
7075
7076 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7077 tracing_entries_write(struct file *filp, const char __user *ubuf,
7078 size_t cnt, loff_t *ppos)
7079 {
7080 struct inode *inode = file_inode(filp);
7081 struct trace_array *tr = inode->i_private;
7082 unsigned long val;
7083 int ret;
7084
7085 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7086 if (ret)
7087 return ret;
7088
7089 /* must have at least 1 entry */
7090 if (!val)
7091 return -EINVAL;
7092
7093 /* value is in KB */
7094 val <<= 10;
7095 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7096 if (ret < 0)
7097 return ret;
7098
7099 *ppos += cnt;
7100
7101 return cnt;
7102 }
7103
7104 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7105 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7106 size_t cnt, loff_t *ppos)
7107 {
7108 struct trace_array *tr = filp->private_data;
7109 char buf[64];
7110 int r, cpu;
7111 unsigned long size = 0, expanded_size = 0;
7112
7113 mutex_lock(&trace_types_lock);
7114 for_each_tracing_cpu(cpu) {
7115 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7116 if (!ring_buffer_expanded)
7117 expanded_size += trace_buf_size >> 10;
7118 }
7119 if (ring_buffer_expanded)
7120 r = sprintf(buf, "%lu\n", size);
7121 else
7122 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7123 mutex_unlock(&trace_types_lock);
7124
7125 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7126 }
7127
7128 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7129 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7130 size_t cnt, loff_t *ppos)
7131 {
7132 /*
7133 * There is no need to read what the user has written, this function
7134 * is just to make sure that there is no error when "echo" is used
7135 */
7136
7137 *ppos += cnt;
7138
7139 return cnt;
7140 }
7141
7142 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7143 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7144 {
7145 struct trace_array *tr = inode->i_private;
7146
7147 /* disable tracing ? */
7148 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7149 tracer_tracing_off(tr);
7150 /* resize the ring buffer to 0 */
7151 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7152
7153 trace_array_put(tr);
7154
7155 return 0;
7156 }
7157
7158 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7159 tracing_mark_write(struct file *filp, const char __user *ubuf,
7160 size_t cnt, loff_t *fpos)
7161 {
7162 struct trace_array *tr = filp->private_data;
7163 struct ring_buffer_event *event;
7164 enum event_trigger_type tt = ETT_NONE;
7165 struct trace_buffer *buffer;
7166 struct print_entry *entry;
7167 ssize_t written;
7168 int size;
7169 int len;
7170
7171 /* Used in tracing_mark_raw_write() as well */
7172 #define FAULTED_STR "<faulted>"
7173 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7174
7175 if (tracing_disabled)
7176 return -EINVAL;
7177
7178 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7179 return -EINVAL;
7180
7181 if (cnt > TRACE_BUF_SIZE)
7182 cnt = TRACE_BUF_SIZE;
7183
7184 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7185
7186 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7187
7188 /* If less than "<faulted>", then make sure we can still add that */
7189 if (cnt < FAULTED_SIZE)
7190 size += FAULTED_SIZE - cnt;
7191
7192 buffer = tr->array_buffer.buffer;
7193 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7194 tracing_gen_ctx());
7195 if (unlikely(!event))
7196 /* Ring buffer disabled, return as if not open for write */
7197 return -EBADF;
7198
7199 entry = ring_buffer_event_data(event);
7200 entry->ip = _THIS_IP_;
7201
7202 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7203 if (len) {
7204 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7205 cnt = FAULTED_SIZE;
7206 written = -EFAULT;
7207 } else
7208 written = cnt;
7209
7210 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7211 /* do not add \n before testing triggers, but add \0 */
7212 entry->buf[cnt] = '\0';
7213 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7214 }
7215
7216 if (entry->buf[cnt - 1] != '\n') {
7217 entry->buf[cnt] = '\n';
7218 entry->buf[cnt + 1] = '\0';
7219 } else
7220 entry->buf[cnt] = '\0';
7221
7222 if (static_branch_unlikely(&trace_marker_exports_enabled))
7223 ftrace_exports(event, TRACE_EXPORT_MARKER);
7224 __buffer_unlock_commit(buffer, event);
7225
7226 if (tt)
7227 event_triggers_post_call(tr->trace_marker_file, tt);
7228
7229 return written;
7230 }
7231
7232 /* Limit it for now to 3K (including tag) */
7233 #define RAW_DATA_MAX_SIZE (1024*3)
7234
7235 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7236 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7237 size_t cnt, loff_t *fpos)
7238 {
7239 struct trace_array *tr = filp->private_data;
7240 struct ring_buffer_event *event;
7241 struct trace_buffer *buffer;
7242 struct raw_data_entry *entry;
7243 ssize_t written;
7244 int size;
7245 int len;
7246
7247 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7248
7249 if (tracing_disabled)
7250 return -EINVAL;
7251
7252 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7253 return -EINVAL;
7254
7255 /* The marker must at least have a tag id */
7256 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7257 return -EINVAL;
7258
7259 if (cnt > TRACE_BUF_SIZE)
7260 cnt = TRACE_BUF_SIZE;
7261
7262 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7263
7264 size = sizeof(*entry) + cnt;
7265 if (cnt < FAULT_SIZE_ID)
7266 size += FAULT_SIZE_ID - cnt;
7267
7268 buffer = tr->array_buffer.buffer;
7269 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7270 tracing_gen_ctx());
7271 if (!event)
7272 /* Ring buffer disabled, return as if not open for write */
7273 return -EBADF;
7274
7275 entry = ring_buffer_event_data(event);
7276
7277 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7278 if (len) {
7279 entry->id = -1;
7280 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7281 written = -EFAULT;
7282 } else
7283 written = cnt;
7284
7285 __buffer_unlock_commit(buffer, event);
7286
7287 return written;
7288 }
7289
tracing_clock_show(struct seq_file * m,void * v)7290 static int tracing_clock_show(struct seq_file *m, void *v)
7291 {
7292 struct trace_array *tr = m->private;
7293 int i;
7294
7295 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7296 seq_printf(m,
7297 "%s%s%s%s", i ? " " : "",
7298 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7299 i == tr->clock_id ? "]" : "");
7300 seq_putc(m, '\n');
7301
7302 return 0;
7303 }
7304
tracing_set_clock(struct trace_array * tr,const char * clockstr)7305 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7306 {
7307 int i;
7308
7309 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7310 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7311 break;
7312 }
7313 if (i == ARRAY_SIZE(trace_clocks))
7314 return -EINVAL;
7315
7316 mutex_lock(&trace_types_lock);
7317
7318 tr->clock_id = i;
7319
7320 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7321
7322 /*
7323 * New clock may not be consistent with the previous clock.
7324 * Reset the buffer so that it doesn't have incomparable timestamps.
7325 */
7326 tracing_reset_online_cpus(&tr->array_buffer);
7327
7328 #ifdef CONFIG_TRACER_MAX_TRACE
7329 if (tr->max_buffer.buffer)
7330 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7331 tracing_reset_online_cpus(&tr->max_buffer);
7332 #endif
7333
7334 mutex_unlock(&trace_types_lock);
7335
7336 return 0;
7337 }
7338
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7339 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7340 size_t cnt, loff_t *fpos)
7341 {
7342 struct seq_file *m = filp->private_data;
7343 struct trace_array *tr = m->private;
7344 char buf[64];
7345 const char *clockstr;
7346 int ret;
7347
7348 if (cnt >= sizeof(buf))
7349 return -EINVAL;
7350
7351 if (copy_from_user(buf, ubuf, cnt))
7352 return -EFAULT;
7353
7354 buf[cnt] = 0;
7355
7356 clockstr = strstrip(buf);
7357
7358 ret = tracing_set_clock(tr, clockstr);
7359 if (ret)
7360 return ret;
7361
7362 *fpos += cnt;
7363
7364 return cnt;
7365 }
7366
tracing_clock_open(struct inode * inode,struct file * file)7367 static int tracing_clock_open(struct inode *inode, struct file *file)
7368 {
7369 struct trace_array *tr = inode->i_private;
7370 int ret;
7371
7372 ret = tracing_check_open_get_tr(tr);
7373 if (ret)
7374 return ret;
7375
7376 ret = single_open(file, tracing_clock_show, inode->i_private);
7377 if (ret < 0)
7378 trace_array_put(tr);
7379
7380 return ret;
7381 }
7382
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7383 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7384 {
7385 struct trace_array *tr = m->private;
7386
7387 mutex_lock(&trace_types_lock);
7388
7389 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7390 seq_puts(m, "delta [absolute]\n");
7391 else
7392 seq_puts(m, "[delta] absolute\n");
7393
7394 mutex_unlock(&trace_types_lock);
7395
7396 return 0;
7397 }
7398
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7399 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7400 {
7401 struct trace_array *tr = inode->i_private;
7402 int ret;
7403
7404 ret = tracing_check_open_get_tr(tr);
7405 if (ret)
7406 return ret;
7407
7408 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7409 if (ret < 0)
7410 trace_array_put(tr);
7411
7412 return ret;
7413 }
7414
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7415 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7416 {
7417 if (rbe == this_cpu_read(trace_buffered_event))
7418 return ring_buffer_time_stamp(buffer);
7419
7420 return ring_buffer_event_time_stamp(buffer, rbe);
7421 }
7422
7423 /*
7424 * Set or disable using the per CPU trace_buffer_event when possible.
7425 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7426 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7427 {
7428 int ret = 0;
7429
7430 mutex_lock(&trace_types_lock);
7431
7432 if (set && tr->no_filter_buffering_ref++)
7433 goto out;
7434
7435 if (!set) {
7436 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7437 ret = -EINVAL;
7438 goto out;
7439 }
7440
7441 --tr->no_filter_buffering_ref;
7442 }
7443 out:
7444 mutex_unlock(&trace_types_lock);
7445
7446 return ret;
7447 }
7448
7449 struct ftrace_buffer_info {
7450 struct trace_iterator iter;
7451 void *spare;
7452 unsigned int spare_cpu;
7453 unsigned int read;
7454 };
7455
7456 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7457 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7458 {
7459 struct trace_array *tr = inode->i_private;
7460 struct trace_iterator *iter;
7461 struct seq_file *m;
7462 int ret;
7463
7464 ret = tracing_check_open_get_tr(tr);
7465 if (ret)
7466 return ret;
7467
7468 if (file->f_mode & FMODE_READ) {
7469 iter = __tracing_open(inode, file, true);
7470 if (IS_ERR(iter))
7471 ret = PTR_ERR(iter);
7472 } else {
7473 /* Writes still need the seq_file to hold the private data */
7474 ret = -ENOMEM;
7475 m = kzalloc(sizeof(*m), GFP_KERNEL);
7476 if (!m)
7477 goto out;
7478 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7479 if (!iter) {
7480 kfree(m);
7481 goto out;
7482 }
7483 ret = 0;
7484
7485 iter->tr = tr;
7486 iter->array_buffer = &tr->max_buffer;
7487 iter->cpu_file = tracing_get_cpu(inode);
7488 m->private = iter;
7489 file->private_data = m;
7490 }
7491 out:
7492 if (ret < 0)
7493 trace_array_put(tr);
7494
7495 return ret;
7496 }
7497
tracing_swap_cpu_buffer(void * tr)7498 static void tracing_swap_cpu_buffer(void *tr)
7499 {
7500 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7501 }
7502
7503 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7504 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7505 loff_t *ppos)
7506 {
7507 struct seq_file *m = filp->private_data;
7508 struct trace_iterator *iter = m->private;
7509 struct trace_array *tr = iter->tr;
7510 unsigned long val;
7511 int ret;
7512
7513 ret = tracing_update_buffers();
7514 if (ret < 0)
7515 return ret;
7516
7517 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7518 if (ret)
7519 return ret;
7520
7521 mutex_lock(&trace_types_lock);
7522
7523 if (tr->current_trace->use_max_tr) {
7524 ret = -EBUSY;
7525 goto out;
7526 }
7527
7528 local_irq_disable();
7529 arch_spin_lock(&tr->max_lock);
7530 if (tr->cond_snapshot)
7531 ret = -EBUSY;
7532 arch_spin_unlock(&tr->max_lock);
7533 local_irq_enable();
7534 if (ret)
7535 goto out;
7536
7537 switch (val) {
7538 case 0:
7539 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7540 ret = -EINVAL;
7541 break;
7542 }
7543 if (tr->allocated_snapshot)
7544 free_snapshot(tr);
7545 break;
7546 case 1:
7547 /* Only allow per-cpu swap if the ring buffer supports it */
7548 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7549 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7550 ret = -EINVAL;
7551 break;
7552 }
7553 #endif
7554 if (tr->allocated_snapshot)
7555 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7556 &tr->array_buffer, iter->cpu_file);
7557 else
7558 ret = tracing_alloc_snapshot_instance(tr);
7559 if (ret < 0)
7560 break;
7561 /* Now, we're going to swap */
7562 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7563 local_irq_disable();
7564 update_max_tr(tr, current, smp_processor_id(), NULL);
7565 local_irq_enable();
7566 } else {
7567 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7568 (void *)tr, 1);
7569 }
7570 break;
7571 default:
7572 if (tr->allocated_snapshot) {
7573 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7574 tracing_reset_online_cpus(&tr->max_buffer);
7575 else
7576 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7577 }
7578 break;
7579 }
7580
7581 if (ret >= 0) {
7582 *ppos += cnt;
7583 ret = cnt;
7584 }
7585 out:
7586 mutex_unlock(&trace_types_lock);
7587 return ret;
7588 }
7589
tracing_snapshot_release(struct inode * inode,struct file * file)7590 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7591 {
7592 struct seq_file *m = file->private_data;
7593 int ret;
7594
7595 ret = tracing_release(inode, file);
7596
7597 if (file->f_mode & FMODE_READ)
7598 return ret;
7599
7600 /* If write only, the seq_file is just a stub */
7601 if (m)
7602 kfree(m->private);
7603 kfree(m);
7604
7605 return 0;
7606 }
7607
7608 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7609 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7610 size_t count, loff_t *ppos);
7611 static int tracing_buffers_release(struct inode *inode, struct file *file);
7612 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7613 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7614
snapshot_raw_open(struct inode * inode,struct file * filp)7615 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7616 {
7617 struct ftrace_buffer_info *info;
7618 int ret;
7619
7620 /* The following checks for tracefs lockdown */
7621 ret = tracing_buffers_open(inode, filp);
7622 if (ret < 0)
7623 return ret;
7624
7625 info = filp->private_data;
7626
7627 if (info->iter.trace->use_max_tr) {
7628 tracing_buffers_release(inode, filp);
7629 return -EBUSY;
7630 }
7631
7632 info->iter.snapshot = true;
7633 info->iter.array_buffer = &info->iter.tr->max_buffer;
7634
7635 return ret;
7636 }
7637
7638 #endif /* CONFIG_TRACER_SNAPSHOT */
7639
7640
7641 static const struct file_operations tracing_thresh_fops = {
7642 .open = tracing_open_generic,
7643 .read = tracing_thresh_read,
7644 .write = tracing_thresh_write,
7645 .llseek = generic_file_llseek,
7646 };
7647
7648 #ifdef CONFIG_TRACER_MAX_TRACE
7649 static const struct file_operations tracing_max_lat_fops = {
7650 .open = tracing_open_generic_tr,
7651 .read = tracing_max_lat_read,
7652 .write = tracing_max_lat_write,
7653 .llseek = generic_file_llseek,
7654 .release = tracing_release_generic_tr,
7655 };
7656 #endif
7657
7658 static const struct file_operations set_tracer_fops = {
7659 .open = tracing_open_generic_tr,
7660 .read = tracing_set_trace_read,
7661 .write = tracing_set_trace_write,
7662 .llseek = generic_file_llseek,
7663 .release = tracing_release_generic_tr,
7664 };
7665
7666 static const struct file_operations tracing_pipe_fops = {
7667 .open = tracing_open_pipe,
7668 .poll = tracing_poll_pipe,
7669 .read = tracing_read_pipe,
7670 .splice_read = tracing_splice_read_pipe,
7671 .release = tracing_release_pipe,
7672 .llseek = no_llseek,
7673 };
7674
7675 static const struct file_operations tracing_entries_fops = {
7676 .open = tracing_open_generic_tr,
7677 .read = tracing_entries_read,
7678 .write = tracing_entries_write,
7679 .llseek = generic_file_llseek,
7680 .release = tracing_release_generic_tr,
7681 };
7682
7683 static const struct file_operations tracing_total_entries_fops = {
7684 .open = tracing_open_generic_tr,
7685 .read = tracing_total_entries_read,
7686 .llseek = generic_file_llseek,
7687 .release = tracing_release_generic_tr,
7688 };
7689
7690 static const struct file_operations tracing_free_buffer_fops = {
7691 .open = tracing_open_generic_tr,
7692 .write = tracing_free_buffer_write,
7693 .release = tracing_free_buffer_release,
7694 };
7695
7696 static const struct file_operations tracing_mark_fops = {
7697 .open = tracing_mark_open,
7698 .write = tracing_mark_write,
7699 .release = tracing_release_generic_tr,
7700 };
7701
7702 static const struct file_operations tracing_mark_raw_fops = {
7703 .open = tracing_mark_open,
7704 .write = tracing_mark_raw_write,
7705 .release = tracing_release_generic_tr,
7706 };
7707
7708 static const struct file_operations trace_clock_fops = {
7709 .open = tracing_clock_open,
7710 .read = seq_read,
7711 .llseek = seq_lseek,
7712 .release = tracing_single_release_tr,
7713 .write = tracing_clock_write,
7714 };
7715
7716 static const struct file_operations trace_time_stamp_mode_fops = {
7717 .open = tracing_time_stamp_mode_open,
7718 .read = seq_read,
7719 .llseek = seq_lseek,
7720 .release = tracing_single_release_tr,
7721 };
7722
7723 #ifdef CONFIG_TRACER_SNAPSHOT
7724 static const struct file_operations snapshot_fops = {
7725 .open = tracing_snapshot_open,
7726 .read = seq_read,
7727 .write = tracing_snapshot_write,
7728 .llseek = tracing_lseek,
7729 .release = tracing_snapshot_release,
7730 };
7731
7732 static const struct file_operations snapshot_raw_fops = {
7733 .open = snapshot_raw_open,
7734 .read = tracing_buffers_read,
7735 .release = tracing_buffers_release,
7736 .splice_read = tracing_buffers_splice_read,
7737 .llseek = no_llseek,
7738 };
7739
7740 #endif /* CONFIG_TRACER_SNAPSHOT */
7741
7742 /*
7743 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7744 * @filp: The active open file structure
7745 * @ubuf: The userspace provided buffer to read value into
7746 * @cnt: The maximum number of bytes to read
7747 * @ppos: The current "file" position
7748 *
7749 * This function implements the write interface for a struct trace_min_max_param.
7750 * The filp->private_data must point to a trace_min_max_param structure that
7751 * defines where to write the value, the min and the max acceptable values,
7752 * and a lock to protect the write.
7753 */
7754 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7755 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7756 {
7757 struct trace_min_max_param *param = filp->private_data;
7758 u64 val;
7759 int err;
7760
7761 if (!param)
7762 return -EFAULT;
7763
7764 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7765 if (err)
7766 return err;
7767
7768 if (param->lock)
7769 mutex_lock(param->lock);
7770
7771 if (param->min && val < *param->min)
7772 err = -EINVAL;
7773
7774 if (param->max && val > *param->max)
7775 err = -EINVAL;
7776
7777 if (!err)
7778 *param->val = val;
7779
7780 if (param->lock)
7781 mutex_unlock(param->lock);
7782
7783 if (err)
7784 return err;
7785
7786 return cnt;
7787 }
7788
7789 /*
7790 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7791 * @filp: The active open file structure
7792 * @ubuf: The userspace provided buffer to read value into
7793 * @cnt: The maximum number of bytes to read
7794 * @ppos: The current "file" position
7795 *
7796 * This function implements the read interface for a struct trace_min_max_param.
7797 * The filp->private_data must point to a trace_min_max_param struct with valid
7798 * data.
7799 */
7800 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7801 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7802 {
7803 struct trace_min_max_param *param = filp->private_data;
7804 char buf[U64_STR_SIZE];
7805 int len;
7806 u64 val;
7807
7808 if (!param)
7809 return -EFAULT;
7810
7811 val = *param->val;
7812
7813 if (cnt > sizeof(buf))
7814 cnt = sizeof(buf);
7815
7816 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7817
7818 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7819 }
7820
7821 const struct file_operations trace_min_max_fops = {
7822 .open = tracing_open_generic,
7823 .read = trace_min_max_read,
7824 .write = trace_min_max_write,
7825 };
7826
7827 #define TRACING_LOG_ERRS_MAX 8
7828 #define TRACING_LOG_LOC_MAX 128
7829
7830 #define CMD_PREFIX " Command: "
7831
7832 struct err_info {
7833 const char **errs; /* ptr to loc-specific array of err strings */
7834 u8 type; /* index into errs -> specific err string */
7835 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7836 u64 ts;
7837 };
7838
7839 struct tracing_log_err {
7840 struct list_head list;
7841 struct err_info info;
7842 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7843 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7844 };
7845
7846 static DEFINE_MUTEX(tracing_err_log_lock);
7847
get_tracing_log_err(struct trace_array * tr)7848 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7849 {
7850 struct tracing_log_err *err;
7851
7852 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7853 err = kzalloc(sizeof(*err), GFP_KERNEL);
7854 if (!err)
7855 err = ERR_PTR(-ENOMEM);
7856 else
7857 tr->n_err_log_entries++;
7858
7859 return err;
7860 }
7861
7862 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7863 list_del(&err->list);
7864
7865 return err;
7866 }
7867
7868 /**
7869 * err_pos - find the position of a string within a command for error careting
7870 * @cmd: The tracing command that caused the error
7871 * @str: The string to position the caret at within @cmd
7872 *
7873 * Finds the position of the first occurrence of @str within @cmd. The
7874 * return value can be passed to tracing_log_err() for caret placement
7875 * within @cmd.
7876 *
7877 * Returns the index within @cmd of the first occurrence of @str or 0
7878 * if @str was not found.
7879 */
err_pos(char * cmd,const char * str)7880 unsigned int err_pos(char *cmd, const char *str)
7881 {
7882 char *found;
7883
7884 if (WARN_ON(!strlen(cmd)))
7885 return 0;
7886
7887 found = strstr(cmd, str);
7888 if (found)
7889 return found - cmd;
7890
7891 return 0;
7892 }
7893
7894 /**
7895 * tracing_log_err - write an error to the tracing error log
7896 * @tr: The associated trace array for the error (NULL for top level array)
7897 * @loc: A string describing where the error occurred
7898 * @cmd: The tracing command that caused the error
7899 * @errs: The array of loc-specific static error strings
7900 * @type: The index into errs[], which produces the specific static err string
7901 * @pos: The position the caret should be placed in the cmd
7902 *
7903 * Writes an error into tracing/error_log of the form:
7904 *
7905 * <loc>: error: <text>
7906 * Command: <cmd>
7907 * ^
7908 *
7909 * tracing/error_log is a small log file containing the last
7910 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7911 * unless there has been a tracing error, and the error log can be
7912 * cleared and have its memory freed by writing the empty string in
7913 * truncation mode to it i.e. echo > tracing/error_log.
7914 *
7915 * NOTE: the @errs array along with the @type param are used to
7916 * produce a static error string - this string is not copied and saved
7917 * when the error is logged - only a pointer to it is saved. See
7918 * existing callers for examples of how static strings are typically
7919 * defined for use with tracing_log_err().
7920 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7921 void tracing_log_err(struct trace_array *tr,
7922 const char *loc, const char *cmd,
7923 const char **errs, u8 type, u8 pos)
7924 {
7925 struct tracing_log_err *err;
7926
7927 if (!tr)
7928 tr = &global_trace;
7929
7930 mutex_lock(&tracing_err_log_lock);
7931 err = get_tracing_log_err(tr);
7932 if (PTR_ERR(err) == -ENOMEM) {
7933 mutex_unlock(&tracing_err_log_lock);
7934 return;
7935 }
7936
7937 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7938 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7939
7940 err->info.errs = errs;
7941 err->info.type = type;
7942 err->info.pos = pos;
7943 err->info.ts = local_clock();
7944
7945 list_add_tail(&err->list, &tr->err_log);
7946 mutex_unlock(&tracing_err_log_lock);
7947 }
7948
clear_tracing_err_log(struct trace_array * tr)7949 static void clear_tracing_err_log(struct trace_array *tr)
7950 {
7951 struct tracing_log_err *err, *next;
7952
7953 mutex_lock(&tracing_err_log_lock);
7954 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7955 list_del(&err->list);
7956 kfree(err);
7957 }
7958
7959 tr->n_err_log_entries = 0;
7960 mutex_unlock(&tracing_err_log_lock);
7961 }
7962
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7963 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7964 {
7965 struct trace_array *tr = m->private;
7966
7967 mutex_lock(&tracing_err_log_lock);
7968
7969 return seq_list_start(&tr->err_log, *pos);
7970 }
7971
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7972 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7973 {
7974 struct trace_array *tr = m->private;
7975
7976 return seq_list_next(v, &tr->err_log, pos);
7977 }
7978
tracing_err_log_seq_stop(struct seq_file * m,void * v)7979 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7980 {
7981 mutex_unlock(&tracing_err_log_lock);
7982 }
7983
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7984 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7985 {
7986 u8 i;
7987
7988 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7989 seq_putc(m, ' ');
7990 for (i = 0; i < pos; i++)
7991 seq_putc(m, ' ');
7992 seq_puts(m, "^\n");
7993 }
7994
tracing_err_log_seq_show(struct seq_file * m,void * v)7995 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7996 {
7997 struct tracing_log_err *err = v;
7998
7999 if (err) {
8000 const char *err_text = err->info.errs[err->info.type];
8001 u64 sec = err->info.ts;
8002 u32 nsec;
8003
8004 nsec = do_div(sec, NSEC_PER_SEC);
8005 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8006 err->loc, err_text);
8007 seq_printf(m, "%s", err->cmd);
8008 tracing_err_log_show_pos(m, err->info.pos);
8009 }
8010
8011 return 0;
8012 }
8013
8014 static const struct seq_operations tracing_err_log_seq_ops = {
8015 .start = tracing_err_log_seq_start,
8016 .next = tracing_err_log_seq_next,
8017 .stop = tracing_err_log_seq_stop,
8018 .show = tracing_err_log_seq_show
8019 };
8020
tracing_err_log_open(struct inode * inode,struct file * file)8021 static int tracing_err_log_open(struct inode *inode, struct file *file)
8022 {
8023 struct trace_array *tr = inode->i_private;
8024 int ret = 0;
8025
8026 ret = tracing_check_open_get_tr(tr);
8027 if (ret)
8028 return ret;
8029
8030 /* If this file was opened for write, then erase contents */
8031 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8032 clear_tracing_err_log(tr);
8033
8034 if (file->f_mode & FMODE_READ) {
8035 ret = seq_open(file, &tracing_err_log_seq_ops);
8036 if (!ret) {
8037 struct seq_file *m = file->private_data;
8038 m->private = tr;
8039 } else {
8040 trace_array_put(tr);
8041 }
8042 }
8043 return ret;
8044 }
8045
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8046 static ssize_t tracing_err_log_write(struct file *file,
8047 const char __user *buffer,
8048 size_t count, loff_t *ppos)
8049 {
8050 return count;
8051 }
8052
tracing_err_log_release(struct inode * inode,struct file * file)8053 static int tracing_err_log_release(struct inode *inode, struct file *file)
8054 {
8055 struct trace_array *tr = inode->i_private;
8056
8057 trace_array_put(tr);
8058
8059 if (file->f_mode & FMODE_READ)
8060 seq_release(inode, file);
8061
8062 return 0;
8063 }
8064
8065 static const struct file_operations tracing_err_log_fops = {
8066 .open = tracing_err_log_open,
8067 .write = tracing_err_log_write,
8068 .read = seq_read,
8069 .llseek = tracing_lseek,
8070 .release = tracing_err_log_release,
8071 };
8072
tracing_buffers_open(struct inode * inode,struct file * filp)8073 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8074 {
8075 struct trace_array *tr = inode->i_private;
8076 struct ftrace_buffer_info *info;
8077 int ret;
8078
8079 ret = tracing_check_open_get_tr(tr);
8080 if (ret)
8081 return ret;
8082
8083 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8084 if (!info) {
8085 trace_array_put(tr);
8086 return -ENOMEM;
8087 }
8088
8089 mutex_lock(&trace_types_lock);
8090
8091 info->iter.tr = tr;
8092 info->iter.cpu_file = tracing_get_cpu(inode);
8093 info->iter.trace = tr->current_trace;
8094 info->iter.array_buffer = &tr->array_buffer;
8095 info->spare = NULL;
8096 /* Force reading ring buffer for first read */
8097 info->read = (unsigned int)-1;
8098
8099 filp->private_data = info;
8100
8101 tr->trace_ref++;
8102
8103 mutex_unlock(&trace_types_lock);
8104
8105 ret = nonseekable_open(inode, filp);
8106 if (ret < 0)
8107 trace_array_put(tr);
8108
8109 return ret;
8110 }
8111
8112 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8113 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8114 {
8115 struct ftrace_buffer_info *info = filp->private_data;
8116 struct trace_iterator *iter = &info->iter;
8117
8118 return trace_poll(iter, filp, poll_table);
8119 }
8120
8121 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8122 tracing_buffers_read(struct file *filp, char __user *ubuf,
8123 size_t count, loff_t *ppos)
8124 {
8125 struct ftrace_buffer_info *info = filp->private_data;
8126 struct trace_iterator *iter = &info->iter;
8127 ssize_t ret = 0;
8128 ssize_t size;
8129
8130 if (!count)
8131 return 0;
8132
8133 #ifdef CONFIG_TRACER_MAX_TRACE
8134 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8135 return -EBUSY;
8136 #endif
8137
8138 if (!info->spare) {
8139 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8140 iter->cpu_file);
8141 if (IS_ERR(info->spare)) {
8142 ret = PTR_ERR(info->spare);
8143 info->spare = NULL;
8144 } else {
8145 info->spare_cpu = iter->cpu_file;
8146 }
8147 }
8148 if (!info->spare)
8149 return ret;
8150
8151 /* Do we have previous read data to read? */
8152 if (info->read < PAGE_SIZE)
8153 goto read;
8154
8155 again:
8156 trace_access_lock(iter->cpu_file);
8157 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8158 &info->spare,
8159 count,
8160 iter->cpu_file, 0);
8161 trace_access_unlock(iter->cpu_file);
8162
8163 if (ret < 0) {
8164 if (trace_empty(iter)) {
8165 if ((filp->f_flags & O_NONBLOCK))
8166 return -EAGAIN;
8167
8168 ret = wait_on_pipe(iter, 0);
8169 if (ret)
8170 return ret;
8171
8172 goto again;
8173 }
8174 return 0;
8175 }
8176
8177 info->read = 0;
8178 read:
8179 size = PAGE_SIZE - info->read;
8180 if (size > count)
8181 size = count;
8182
8183 ret = copy_to_user(ubuf, info->spare + info->read, size);
8184 if (ret == size)
8185 return -EFAULT;
8186
8187 size -= ret;
8188
8189 *ppos += size;
8190 info->read += size;
8191
8192 return size;
8193 }
8194
tracing_buffers_release(struct inode * inode,struct file * file)8195 static int tracing_buffers_release(struct inode *inode, struct file *file)
8196 {
8197 struct ftrace_buffer_info *info = file->private_data;
8198 struct trace_iterator *iter = &info->iter;
8199
8200 mutex_lock(&trace_types_lock);
8201
8202 iter->tr->trace_ref--;
8203
8204 __trace_array_put(iter->tr);
8205
8206 if (info->spare)
8207 ring_buffer_free_read_page(iter->array_buffer->buffer,
8208 info->spare_cpu, info->spare);
8209 kvfree(info);
8210
8211 mutex_unlock(&trace_types_lock);
8212
8213 return 0;
8214 }
8215
8216 struct buffer_ref {
8217 struct trace_buffer *buffer;
8218 void *page;
8219 int cpu;
8220 refcount_t refcount;
8221 };
8222
buffer_ref_release(struct buffer_ref * ref)8223 static void buffer_ref_release(struct buffer_ref *ref)
8224 {
8225 if (!refcount_dec_and_test(&ref->refcount))
8226 return;
8227 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8228 kfree(ref);
8229 }
8230
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8231 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8232 struct pipe_buffer *buf)
8233 {
8234 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8235
8236 buffer_ref_release(ref);
8237 buf->private = 0;
8238 }
8239
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8240 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8241 struct pipe_buffer *buf)
8242 {
8243 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8244
8245 if (refcount_read(&ref->refcount) > INT_MAX/2)
8246 return false;
8247
8248 refcount_inc(&ref->refcount);
8249 return true;
8250 }
8251
8252 /* Pipe buffer operations for a buffer. */
8253 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8254 .release = buffer_pipe_buf_release,
8255 .get = buffer_pipe_buf_get,
8256 };
8257
8258 /*
8259 * Callback from splice_to_pipe(), if we need to release some pages
8260 * at the end of the spd in case we error'ed out in filling the pipe.
8261 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8262 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8263 {
8264 struct buffer_ref *ref =
8265 (struct buffer_ref *)spd->partial[i].private;
8266
8267 buffer_ref_release(ref);
8268 spd->partial[i].private = 0;
8269 }
8270
8271 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8272 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8273 struct pipe_inode_info *pipe, size_t len,
8274 unsigned int flags)
8275 {
8276 struct ftrace_buffer_info *info = file->private_data;
8277 struct trace_iterator *iter = &info->iter;
8278 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8279 struct page *pages_def[PIPE_DEF_BUFFERS];
8280 struct splice_pipe_desc spd = {
8281 .pages = pages_def,
8282 .partial = partial_def,
8283 .nr_pages_max = PIPE_DEF_BUFFERS,
8284 .ops = &buffer_pipe_buf_ops,
8285 .spd_release = buffer_spd_release,
8286 };
8287 struct buffer_ref *ref;
8288 int entries, i;
8289 ssize_t ret = 0;
8290
8291 #ifdef CONFIG_TRACER_MAX_TRACE
8292 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8293 return -EBUSY;
8294 #endif
8295
8296 if (*ppos & (PAGE_SIZE - 1))
8297 return -EINVAL;
8298
8299 if (len & (PAGE_SIZE - 1)) {
8300 if (len < PAGE_SIZE)
8301 return -EINVAL;
8302 len &= PAGE_MASK;
8303 }
8304
8305 if (splice_grow_spd(pipe, &spd))
8306 return -ENOMEM;
8307
8308 again:
8309 trace_access_lock(iter->cpu_file);
8310 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8311
8312 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8313 struct page *page;
8314 int r;
8315
8316 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8317 if (!ref) {
8318 ret = -ENOMEM;
8319 break;
8320 }
8321
8322 refcount_set(&ref->refcount, 1);
8323 ref->buffer = iter->array_buffer->buffer;
8324 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8325 if (IS_ERR(ref->page)) {
8326 ret = PTR_ERR(ref->page);
8327 ref->page = NULL;
8328 kfree(ref);
8329 break;
8330 }
8331 ref->cpu = iter->cpu_file;
8332
8333 r = ring_buffer_read_page(ref->buffer, &ref->page,
8334 len, iter->cpu_file, 1);
8335 if (r < 0) {
8336 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8337 ref->page);
8338 kfree(ref);
8339 break;
8340 }
8341
8342 page = virt_to_page(ref->page);
8343
8344 spd.pages[i] = page;
8345 spd.partial[i].len = PAGE_SIZE;
8346 spd.partial[i].offset = 0;
8347 spd.partial[i].private = (unsigned long)ref;
8348 spd.nr_pages++;
8349 *ppos += PAGE_SIZE;
8350
8351 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8352 }
8353
8354 trace_access_unlock(iter->cpu_file);
8355 spd.nr_pages = i;
8356
8357 /* did we read anything? */
8358 if (!spd.nr_pages) {
8359 if (ret)
8360 goto out;
8361
8362 ret = -EAGAIN;
8363 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8364 goto out;
8365
8366 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8367 if (ret)
8368 goto out;
8369
8370 /* No need to wait after waking up when tracing is off */
8371 if (!tracer_tracing_is_on(iter->tr))
8372 goto out;
8373
8374 goto again;
8375 }
8376
8377 ret = splice_to_pipe(pipe, &spd);
8378 out:
8379 splice_shrink_spd(&spd);
8380
8381 return ret;
8382 }
8383
8384 static const struct file_operations tracing_buffers_fops = {
8385 .open = tracing_buffers_open,
8386 .read = tracing_buffers_read,
8387 .poll = tracing_buffers_poll,
8388 .release = tracing_buffers_release,
8389 .splice_read = tracing_buffers_splice_read,
8390 .llseek = no_llseek,
8391 };
8392
8393 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8394 tracing_stats_read(struct file *filp, char __user *ubuf,
8395 size_t count, loff_t *ppos)
8396 {
8397 struct inode *inode = file_inode(filp);
8398 struct trace_array *tr = inode->i_private;
8399 struct array_buffer *trace_buf = &tr->array_buffer;
8400 int cpu = tracing_get_cpu(inode);
8401 struct trace_seq *s;
8402 unsigned long cnt;
8403 unsigned long long t;
8404 unsigned long usec_rem;
8405
8406 s = kmalloc(sizeof(*s), GFP_KERNEL);
8407 if (!s)
8408 return -ENOMEM;
8409
8410 trace_seq_init(s);
8411
8412 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8413 trace_seq_printf(s, "entries: %ld\n", cnt);
8414
8415 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8416 trace_seq_printf(s, "overrun: %ld\n", cnt);
8417
8418 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8419 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8420
8421 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8422 trace_seq_printf(s, "bytes: %ld\n", cnt);
8423
8424 if (trace_clocks[tr->clock_id].in_ns) {
8425 /* local or global for trace_clock */
8426 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8427 usec_rem = do_div(t, USEC_PER_SEC);
8428 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8429 t, usec_rem);
8430
8431 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8432 usec_rem = do_div(t, USEC_PER_SEC);
8433 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8434 } else {
8435 /* counter or tsc mode for trace_clock */
8436 trace_seq_printf(s, "oldest event ts: %llu\n",
8437 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8438
8439 trace_seq_printf(s, "now ts: %llu\n",
8440 ring_buffer_time_stamp(trace_buf->buffer));
8441 }
8442
8443 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8444 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8445
8446 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8447 trace_seq_printf(s, "read events: %ld\n", cnt);
8448
8449 count = simple_read_from_buffer(ubuf, count, ppos,
8450 s->buffer, trace_seq_used(s));
8451
8452 kfree(s);
8453
8454 return count;
8455 }
8456
8457 static const struct file_operations tracing_stats_fops = {
8458 .open = tracing_open_generic_tr,
8459 .read = tracing_stats_read,
8460 .llseek = generic_file_llseek,
8461 .release = tracing_release_generic_tr,
8462 };
8463
8464 #ifdef CONFIG_DYNAMIC_FTRACE
8465
8466 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8467 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8468 size_t cnt, loff_t *ppos)
8469 {
8470 ssize_t ret;
8471 char *buf;
8472 int r;
8473
8474 /* 256 should be plenty to hold the amount needed */
8475 buf = kmalloc(256, GFP_KERNEL);
8476 if (!buf)
8477 return -ENOMEM;
8478
8479 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8480 ftrace_update_tot_cnt,
8481 ftrace_number_of_pages,
8482 ftrace_number_of_groups);
8483
8484 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8485 kfree(buf);
8486 return ret;
8487 }
8488
8489 static const struct file_operations tracing_dyn_info_fops = {
8490 .open = tracing_open_generic,
8491 .read = tracing_read_dyn_info,
8492 .llseek = generic_file_llseek,
8493 };
8494 #endif /* CONFIG_DYNAMIC_FTRACE */
8495
8496 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8497 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8498 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8499 struct trace_array *tr, struct ftrace_probe_ops *ops,
8500 void *data)
8501 {
8502 tracing_snapshot_instance(tr);
8503 }
8504
8505 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8506 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8507 struct trace_array *tr, struct ftrace_probe_ops *ops,
8508 void *data)
8509 {
8510 struct ftrace_func_mapper *mapper = data;
8511 long *count = NULL;
8512
8513 if (mapper)
8514 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8515
8516 if (count) {
8517
8518 if (*count <= 0)
8519 return;
8520
8521 (*count)--;
8522 }
8523
8524 tracing_snapshot_instance(tr);
8525 }
8526
8527 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8528 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8529 struct ftrace_probe_ops *ops, void *data)
8530 {
8531 struct ftrace_func_mapper *mapper = data;
8532 long *count = NULL;
8533
8534 seq_printf(m, "%ps:", (void *)ip);
8535
8536 seq_puts(m, "snapshot");
8537
8538 if (mapper)
8539 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8540
8541 if (count)
8542 seq_printf(m, ":count=%ld\n", *count);
8543 else
8544 seq_puts(m, ":unlimited\n");
8545
8546 return 0;
8547 }
8548
8549 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8550 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8551 unsigned long ip, void *init_data, void **data)
8552 {
8553 struct ftrace_func_mapper *mapper = *data;
8554
8555 if (!mapper) {
8556 mapper = allocate_ftrace_func_mapper();
8557 if (!mapper)
8558 return -ENOMEM;
8559 *data = mapper;
8560 }
8561
8562 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8563 }
8564
8565 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8566 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8567 unsigned long ip, void *data)
8568 {
8569 struct ftrace_func_mapper *mapper = data;
8570
8571 if (!ip) {
8572 if (!mapper)
8573 return;
8574 free_ftrace_func_mapper(mapper, NULL);
8575 return;
8576 }
8577
8578 ftrace_func_mapper_remove_ip(mapper, ip);
8579 }
8580
8581 static struct ftrace_probe_ops snapshot_probe_ops = {
8582 .func = ftrace_snapshot,
8583 .print = ftrace_snapshot_print,
8584 };
8585
8586 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8587 .func = ftrace_count_snapshot,
8588 .print = ftrace_snapshot_print,
8589 .init = ftrace_snapshot_init,
8590 .free = ftrace_snapshot_free,
8591 };
8592
8593 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8594 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8595 char *glob, char *cmd, char *param, int enable)
8596 {
8597 struct ftrace_probe_ops *ops;
8598 void *count = (void *)-1;
8599 char *number;
8600 int ret;
8601
8602 if (!tr)
8603 return -ENODEV;
8604
8605 /* hash funcs only work with set_ftrace_filter */
8606 if (!enable)
8607 return -EINVAL;
8608
8609 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8610
8611 if (glob[0] == '!')
8612 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8613
8614 if (!param)
8615 goto out_reg;
8616
8617 number = strsep(¶m, ":");
8618
8619 if (!strlen(number))
8620 goto out_reg;
8621
8622 /*
8623 * We use the callback data field (which is a pointer)
8624 * as our counter.
8625 */
8626 ret = kstrtoul(number, 0, (unsigned long *)&count);
8627 if (ret)
8628 return ret;
8629
8630 out_reg:
8631 ret = tracing_alloc_snapshot_instance(tr);
8632 if (ret < 0)
8633 goto out;
8634
8635 ret = register_ftrace_function_probe(glob, tr, ops, count);
8636
8637 out:
8638 return ret < 0 ? ret : 0;
8639 }
8640
8641 static struct ftrace_func_command ftrace_snapshot_cmd = {
8642 .name = "snapshot",
8643 .func = ftrace_trace_snapshot_callback,
8644 };
8645
register_snapshot_cmd(void)8646 static __init int register_snapshot_cmd(void)
8647 {
8648 return register_ftrace_command(&ftrace_snapshot_cmd);
8649 }
8650 #else
register_snapshot_cmd(void)8651 static inline __init int register_snapshot_cmd(void) { return 0; }
8652 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8653
tracing_get_dentry(struct trace_array * tr)8654 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8655 {
8656 if (WARN_ON(!tr->dir))
8657 return ERR_PTR(-ENODEV);
8658
8659 /* Top directory uses NULL as the parent */
8660 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8661 return NULL;
8662
8663 /* All sub buffers have a descriptor */
8664 return tr->dir;
8665 }
8666
tracing_dentry_percpu(struct trace_array * tr,int cpu)8667 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8668 {
8669 struct dentry *d_tracer;
8670
8671 if (tr->percpu_dir)
8672 return tr->percpu_dir;
8673
8674 d_tracer = tracing_get_dentry(tr);
8675 if (IS_ERR(d_tracer))
8676 return NULL;
8677
8678 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8679
8680 MEM_FAIL(!tr->percpu_dir,
8681 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8682
8683 return tr->percpu_dir;
8684 }
8685
8686 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8687 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8688 void *data, long cpu, const struct file_operations *fops)
8689 {
8690 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8691
8692 if (ret) /* See tracing_get_cpu() */
8693 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8694 return ret;
8695 }
8696
8697 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8698 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8699 {
8700 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8701 struct dentry *d_cpu;
8702 char cpu_dir[30]; /* 30 characters should be more than enough */
8703
8704 if (!d_percpu)
8705 return;
8706
8707 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8708 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8709 if (!d_cpu) {
8710 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8711 return;
8712 }
8713
8714 /* per cpu trace_pipe */
8715 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8716 tr, cpu, &tracing_pipe_fops);
8717
8718 /* per cpu trace */
8719 trace_create_cpu_file("trace", 0644, d_cpu,
8720 tr, cpu, &tracing_fops);
8721
8722 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8723 tr, cpu, &tracing_buffers_fops);
8724
8725 trace_create_cpu_file("stats", 0444, d_cpu,
8726 tr, cpu, &tracing_stats_fops);
8727
8728 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8729 tr, cpu, &tracing_entries_fops);
8730
8731 #ifdef CONFIG_TRACER_SNAPSHOT
8732 trace_create_cpu_file("snapshot", 0644, d_cpu,
8733 tr, cpu, &snapshot_fops);
8734
8735 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8736 tr, cpu, &snapshot_raw_fops);
8737 #endif
8738 }
8739
8740 #ifdef CONFIG_FTRACE_SELFTEST
8741 /* Let selftest have access to static functions in this file */
8742 #include "trace_selftest.c"
8743 #endif
8744
8745 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8746 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8747 loff_t *ppos)
8748 {
8749 struct trace_option_dentry *topt = filp->private_data;
8750 char *buf;
8751
8752 if (topt->flags->val & topt->opt->bit)
8753 buf = "1\n";
8754 else
8755 buf = "0\n";
8756
8757 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8758 }
8759
8760 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8761 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8762 loff_t *ppos)
8763 {
8764 struct trace_option_dentry *topt = filp->private_data;
8765 unsigned long val;
8766 int ret;
8767
8768 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8769 if (ret)
8770 return ret;
8771
8772 if (val != 0 && val != 1)
8773 return -EINVAL;
8774
8775 if (!!(topt->flags->val & topt->opt->bit) != val) {
8776 mutex_lock(&trace_types_lock);
8777 ret = __set_tracer_option(topt->tr, topt->flags,
8778 topt->opt, !val);
8779 mutex_unlock(&trace_types_lock);
8780 if (ret)
8781 return ret;
8782 }
8783
8784 *ppos += cnt;
8785
8786 return cnt;
8787 }
8788
tracing_open_options(struct inode * inode,struct file * filp)8789 static int tracing_open_options(struct inode *inode, struct file *filp)
8790 {
8791 struct trace_option_dentry *topt = inode->i_private;
8792 int ret;
8793
8794 ret = tracing_check_open_get_tr(topt->tr);
8795 if (ret)
8796 return ret;
8797
8798 filp->private_data = inode->i_private;
8799 return 0;
8800 }
8801
tracing_release_options(struct inode * inode,struct file * file)8802 static int tracing_release_options(struct inode *inode, struct file *file)
8803 {
8804 struct trace_option_dentry *topt = file->private_data;
8805
8806 trace_array_put(topt->tr);
8807 return 0;
8808 }
8809
8810 static const struct file_operations trace_options_fops = {
8811 .open = tracing_open_options,
8812 .read = trace_options_read,
8813 .write = trace_options_write,
8814 .llseek = generic_file_llseek,
8815 .release = tracing_release_options,
8816 };
8817
8818 /*
8819 * In order to pass in both the trace_array descriptor as well as the index
8820 * to the flag that the trace option file represents, the trace_array
8821 * has a character array of trace_flags_index[], which holds the index
8822 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8823 * The address of this character array is passed to the flag option file
8824 * read/write callbacks.
8825 *
8826 * In order to extract both the index and the trace_array descriptor,
8827 * get_tr_index() uses the following algorithm.
8828 *
8829 * idx = *ptr;
8830 *
8831 * As the pointer itself contains the address of the index (remember
8832 * index[1] == 1).
8833 *
8834 * Then to get the trace_array descriptor, by subtracting that index
8835 * from the ptr, we get to the start of the index itself.
8836 *
8837 * ptr - idx == &index[0]
8838 *
8839 * Then a simple container_of() from that pointer gets us to the
8840 * trace_array descriptor.
8841 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8842 static void get_tr_index(void *data, struct trace_array **ptr,
8843 unsigned int *pindex)
8844 {
8845 *pindex = *(unsigned char *)data;
8846
8847 *ptr = container_of(data - *pindex, struct trace_array,
8848 trace_flags_index);
8849 }
8850
8851 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8852 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8853 loff_t *ppos)
8854 {
8855 void *tr_index = filp->private_data;
8856 struct trace_array *tr;
8857 unsigned int index;
8858 char *buf;
8859
8860 get_tr_index(tr_index, &tr, &index);
8861
8862 if (tr->trace_flags & (1 << index))
8863 buf = "1\n";
8864 else
8865 buf = "0\n";
8866
8867 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8868 }
8869
8870 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8871 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8872 loff_t *ppos)
8873 {
8874 void *tr_index = filp->private_data;
8875 struct trace_array *tr;
8876 unsigned int index;
8877 unsigned long val;
8878 int ret;
8879
8880 get_tr_index(tr_index, &tr, &index);
8881
8882 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8883 if (ret)
8884 return ret;
8885
8886 if (val != 0 && val != 1)
8887 return -EINVAL;
8888
8889 mutex_lock(&event_mutex);
8890 mutex_lock(&trace_types_lock);
8891 ret = set_tracer_flag(tr, 1 << index, val);
8892 mutex_unlock(&trace_types_lock);
8893 mutex_unlock(&event_mutex);
8894
8895 if (ret < 0)
8896 return ret;
8897
8898 *ppos += cnt;
8899
8900 return cnt;
8901 }
8902
8903 static const struct file_operations trace_options_core_fops = {
8904 .open = tracing_open_generic,
8905 .read = trace_options_core_read,
8906 .write = trace_options_core_write,
8907 .llseek = generic_file_llseek,
8908 };
8909
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8910 struct dentry *trace_create_file(const char *name,
8911 umode_t mode,
8912 struct dentry *parent,
8913 void *data,
8914 const struct file_operations *fops)
8915 {
8916 struct dentry *ret;
8917
8918 ret = tracefs_create_file(name, mode, parent, data, fops);
8919 if (!ret)
8920 pr_warn("Could not create tracefs '%s' entry\n", name);
8921
8922 return ret;
8923 }
8924
8925
trace_options_init_dentry(struct trace_array * tr)8926 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8927 {
8928 struct dentry *d_tracer;
8929
8930 if (tr->options)
8931 return tr->options;
8932
8933 d_tracer = tracing_get_dentry(tr);
8934 if (IS_ERR(d_tracer))
8935 return NULL;
8936
8937 tr->options = tracefs_create_dir("options", d_tracer);
8938 if (!tr->options) {
8939 pr_warn("Could not create tracefs directory 'options'\n");
8940 return NULL;
8941 }
8942
8943 return tr->options;
8944 }
8945
8946 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8947 create_trace_option_file(struct trace_array *tr,
8948 struct trace_option_dentry *topt,
8949 struct tracer_flags *flags,
8950 struct tracer_opt *opt)
8951 {
8952 struct dentry *t_options;
8953
8954 t_options = trace_options_init_dentry(tr);
8955 if (!t_options)
8956 return;
8957
8958 topt->flags = flags;
8959 topt->opt = opt;
8960 topt->tr = tr;
8961
8962 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8963 &trace_options_fops);
8964
8965 }
8966
8967 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8968 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8969 {
8970 struct trace_option_dentry *topts;
8971 struct trace_options *tr_topts;
8972 struct tracer_flags *flags;
8973 struct tracer_opt *opts;
8974 int cnt;
8975 int i;
8976
8977 if (!tracer)
8978 return;
8979
8980 flags = tracer->flags;
8981
8982 if (!flags || !flags->opts)
8983 return;
8984
8985 /*
8986 * If this is an instance, only create flags for tracers
8987 * the instance may have.
8988 */
8989 if (!trace_ok_for_array(tracer, tr))
8990 return;
8991
8992 for (i = 0; i < tr->nr_topts; i++) {
8993 /* Make sure there's no duplicate flags. */
8994 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8995 return;
8996 }
8997
8998 opts = flags->opts;
8999
9000 for (cnt = 0; opts[cnt].name; cnt++)
9001 ;
9002
9003 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9004 if (!topts)
9005 return;
9006
9007 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9008 GFP_KERNEL);
9009 if (!tr_topts) {
9010 kfree(topts);
9011 return;
9012 }
9013
9014 tr->topts = tr_topts;
9015 tr->topts[tr->nr_topts].tracer = tracer;
9016 tr->topts[tr->nr_topts].topts = topts;
9017 tr->nr_topts++;
9018
9019 for (cnt = 0; opts[cnt].name; cnt++) {
9020 create_trace_option_file(tr, &topts[cnt], flags,
9021 &opts[cnt]);
9022 MEM_FAIL(topts[cnt].entry == NULL,
9023 "Failed to create trace option: %s",
9024 opts[cnt].name);
9025 }
9026 }
9027
9028 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9029 create_trace_option_core_file(struct trace_array *tr,
9030 const char *option, long index)
9031 {
9032 struct dentry *t_options;
9033
9034 t_options = trace_options_init_dentry(tr);
9035 if (!t_options)
9036 return NULL;
9037
9038 return trace_create_file(option, 0644, t_options,
9039 (void *)&tr->trace_flags_index[index],
9040 &trace_options_core_fops);
9041 }
9042
create_trace_options_dir(struct trace_array * tr)9043 static void create_trace_options_dir(struct trace_array *tr)
9044 {
9045 struct dentry *t_options;
9046 bool top_level = tr == &global_trace;
9047 int i;
9048
9049 t_options = trace_options_init_dentry(tr);
9050 if (!t_options)
9051 return;
9052
9053 for (i = 0; trace_options[i]; i++) {
9054 if (top_level ||
9055 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9056 create_trace_option_core_file(tr, trace_options[i], i);
9057 }
9058 }
9059
9060 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9061 rb_simple_read(struct file *filp, char __user *ubuf,
9062 size_t cnt, loff_t *ppos)
9063 {
9064 struct trace_array *tr = filp->private_data;
9065 char buf[64];
9066 int r;
9067
9068 r = tracer_tracing_is_on(tr);
9069 r = sprintf(buf, "%d\n", r);
9070
9071 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9072 }
9073
9074 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9075 rb_simple_write(struct file *filp, const char __user *ubuf,
9076 size_t cnt, loff_t *ppos)
9077 {
9078 struct trace_array *tr = filp->private_data;
9079 struct trace_buffer *buffer = tr->array_buffer.buffer;
9080 unsigned long val;
9081 int ret;
9082
9083 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9084 if (ret)
9085 return ret;
9086
9087 if (buffer) {
9088 mutex_lock(&trace_types_lock);
9089 if (!!val == tracer_tracing_is_on(tr)) {
9090 val = 0; /* do nothing */
9091 } else if (val) {
9092 tracer_tracing_on(tr);
9093 if (tr->current_trace->start)
9094 tr->current_trace->start(tr);
9095 } else {
9096 tracer_tracing_off(tr);
9097 if (tr->current_trace->stop)
9098 tr->current_trace->stop(tr);
9099 /* Wake up any waiters */
9100 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9101 }
9102 mutex_unlock(&trace_types_lock);
9103 }
9104
9105 (*ppos)++;
9106
9107 return cnt;
9108 }
9109
9110 static const struct file_operations rb_simple_fops = {
9111 .open = tracing_open_generic_tr,
9112 .read = rb_simple_read,
9113 .write = rb_simple_write,
9114 .release = tracing_release_generic_tr,
9115 .llseek = default_llseek,
9116 };
9117
9118 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9119 buffer_percent_read(struct file *filp, char __user *ubuf,
9120 size_t cnt, loff_t *ppos)
9121 {
9122 struct trace_array *tr = filp->private_data;
9123 char buf[64];
9124 int r;
9125
9126 r = tr->buffer_percent;
9127 r = sprintf(buf, "%d\n", r);
9128
9129 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9130 }
9131
9132 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9133 buffer_percent_write(struct file *filp, const char __user *ubuf,
9134 size_t cnt, loff_t *ppos)
9135 {
9136 struct trace_array *tr = filp->private_data;
9137 unsigned long val;
9138 int ret;
9139
9140 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9141 if (ret)
9142 return ret;
9143
9144 if (val > 100)
9145 return -EINVAL;
9146
9147 tr->buffer_percent = val;
9148
9149 (*ppos)++;
9150
9151 return cnt;
9152 }
9153
9154 static const struct file_operations buffer_percent_fops = {
9155 .open = tracing_open_generic_tr,
9156 .read = buffer_percent_read,
9157 .write = buffer_percent_write,
9158 .release = tracing_release_generic_tr,
9159 .llseek = default_llseek,
9160 };
9161
9162 static struct dentry *trace_instance_dir;
9163
9164 static void
9165 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9166
9167 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9168 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9169 {
9170 enum ring_buffer_flags rb_flags;
9171
9172 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9173
9174 buf->tr = tr;
9175
9176 buf->buffer = ring_buffer_alloc(size, rb_flags);
9177 if (!buf->buffer)
9178 return -ENOMEM;
9179
9180 buf->data = alloc_percpu(struct trace_array_cpu);
9181 if (!buf->data) {
9182 ring_buffer_free(buf->buffer);
9183 buf->buffer = NULL;
9184 return -ENOMEM;
9185 }
9186
9187 /* Allocate the first page for all buffers */
9188 set_buffer_entries(&tr->array_buffer,
9189 ring_buffer_size(tr->array_buffer.buffer, 0));
9190
9191 return 0;
9192 }
9193
allocate_trace_buffers(struct trace_array * tr,int size)9194 static int allocate_trace_buffers(struct trace_array *tr, int size)
9195 {
9196 int ret;
9197
9198 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9199 if (ret)
9200 return ret;
9201
9202 #ifdef CONFIG_TRACER_MAX_TRACE
9203 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9204 allocate_snapshot ? size : 1);
9205 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9206 ring_buffer_free(tr->array_buffer.buffer);
9207 tr->array_buffer.buffer = NULL;
9208 free_percpu(tr->array_buffer.data);
9209 tr->array_buffer.data = NULL;
9210 return -ENOMEM;
9211 }
9212 tr->allocated_snapshot = allocate_snapshot;
9213
9214 /*
9215 * Only the top level trace array gets its snapshot allocated
9216 * from the kernel command line.
9217 */
9218 allocate_snapshot = false;
9219 #endif
9220
9221 return 0;
9222 }
9223
free_trace_buffer(struct array_buffer * buf)9224 static void free_trace_buffer(struct array_buffer *buf)
9225 {
9226 if (buf->buffer) {
9227 ring_buffer_free(buf->buffer);
9228 buf->buffer = NULL;
9229 free_percpu(buf->data);
9230 buf->data = NULL;
9231 }
9232 }
9233
free_trace_buffers(struct trace_array * tr)9234 static void free_trace_buffers(struct trace_array *tr)
9235 {
9236 if (!tr)
9237 return;
9238
9239 free_trace_buffer(&tr->array_buffer);
9240
9241 #ifdef CONFIG_TRACER_MAX_TRACE
9242 free_trace_buffer(&tr->max_buffer);
9243 #endif
9244 }
9245
init_trace_flags_index(struct trace_array * tr)9246 static void init_trace_flags_index(struct trace_array *tr)
9247 {
9248 int i;
9249
9250 /* Used by the trace options files */
9251 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9252 tr->trace_flags_index[i] = i;
9253 }
9254
__update_tracer_options(struct trace_array * tr)9255 static void __update_tracer_options(struct trace_array *tr)
9256 {
9257 struct tracer *t;
9258
9259 for (t = trace_types; t; t = t->next)
9260 add_tracer_options(tr, t);
9261 }
9262
update_tracer_options(struct trace_array * tr)9263 static void update_tracer_options(struct trace_array *tr)
9264 {
9265 mutex_lock(&trace_types_lock);
9266 tracer_options_updated = true;
9267 __update_tracer_options(tr);
9268 mutex_unlock(&trace_types_lock);
9269 }
9270
9271 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9272 struct trace_array *trace_array_find(const char *instance)
9273 {
9274 struct trace_array *tr, *found = NULL;
9275
9276 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9277 if (tr->name && strcmp(tr->name, instance) == 0) {
9278 found = tr;
9279 break;
9280 }
9281 }
9282
9283 return found;
9284 }
9285
trace_array_find_get(const char * instance)9286 struct trace_array *trace_array_find_get(const char *instance)
9287 {
9288 struct trace_array *tr;
9289
9290 mutex_lock(&trace_types_lock);
9291 tr = trace_array_find(instance);
9292 if (tr)
9293 tr->ref++;
9294 mutex_unlock(&trace_types_lock);
9295
9296 return tr;
9297 }
9298
trace_array_create_dir(struct trace_array * tr)9299 static int trace_array_create_dir(struct trace_array *tr)
9300 {
9301 int ret;
9302
9303 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9304 if (!tr->dir)
9305 return -EINVAL;
9306
9307 ret = event_trace_add_tracer(tr->dir, tr);
9308 if (ret) {
9309 tracefs_remove(tr->dir);
9310 return ret;
9311 }
9312
9313 init_tracer_tracefs(tr, tr->dir);
9314 __update_tracer_options(tr);
9315
9316 return ret;
9317 }
9318
trace_array_create(const char * name)9319 static struct trace_array *trace_array_create(const char *name)
9320 {
9321 struct trace_array *tr;
9322 int ret;
9323
9324 ret = -ENOMEM;
9325 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9326 if (!tr)
9327 return ERR_PTR(ret);
9328
9329 tr->name = kstrdup(name, GFP_KERNEL);
9330 if (!tr->name)
9331 goto out_free_tr;
9332
9333 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9334 goto out_free_tr;
9335
9336 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9337
9338 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9339
9340 raw_spin_lock_init(&tr->start_lock);
9341
9342 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9343
9344 tr->current_trace = &nop_trace;
9345
9346 INIT_LIST_HEAD(&tr->systems);
9347 INIT_LIST_HEAD(&tr->events);
9348 INIT_LIST_HEAD(&tr->hist_vars);
9349 INIT_LIST_HEAD(&tr->err_log);
9350
9351 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9352 goto out_free_tr;
9353
9354 if (ftrace_allocate_ftrace_ops(tr) < 0)
9355 goto out_free_tr;
9356
9357 ftrace_init_trace_array(tr);
9358
9359 init_trace_flags_index(tr);
9360
9361 if (trace_instance_dir) {
9362 ret = trace_array_create_dir(tr);
9363 if (ret)
9364 goto out_free_tr;
9365 } else
9366 __trace_early_add_events(tr);
9367
9368 list_add(&tr->list, &ftrace_trace_arrays);
9369
9370 tr->ref++;
9371
9372 return tr;
9373
9374 out_free_tr:
9375 ftrace_free_ftrace_ops(tr);
9376 free_trace_buffers(tr);
9377 free_cpumask_var(tr->tracing_cpumask);
9378 kfree(tr->name);
9379 kfree(tr);
9380
9381 return ERR_PTR(ret);
9382 }
9383
instance_mkdir(const char * name)9384 static int instance_mkdir(const char *name)
9385 {
9386 struct trace_array *tr;
9387 int ret;
9388
9389 mutex_lock(&event_mutex);
9390 mutex_lock(&trace_types_lock);
9391
9392 ret = -EEXIST;
9393 if (trace_array_find(name))
9394 goto out_unlock;
9395
9396 tr = trace_array_create(name);
9397
9398 ret = PTR_ERR_OR_ZERO(tr);
9399
9400 out_unlock:
9401 mutex_unlock(&trace_types_lock);
9402 mutex_unlock(&event_mutex);
9403 return ret;
9404 }
9405
9406 /**
9407 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9408 * @name: The name of the trace array to be looked up/created.
9409 *
9410 * Returns pointer to trace array with given name.
9411 * NULL, if it cannot be created.
9412 *
9413 * NOTE: This function increments the reference counter associated with the
9414 * trace array returned. This makes sure it cannot be freed while in use.
9415 * Use trace_array_put() once the trace array is no longer needed.
9416 * If the trace_array is to be freed, trace_array_destroy() needs to
9417 * be called after the trace_array_put(), or simply let user space delete
9418 * it from the tracefs instances directory. But until the
9419 * trace_array_put() is called, user space can not delete it.
9420 *
9421 */
trace_array_get_by_name(const char * name)9422 struct trace_array *trace_array_get_by_name(const char *name)
9423 {
9424 struct trace_array *tr;
9425
9426 mutex_lock(&event_mutex);
9427 mutex_lock(&trace_types_lock);
9428
9429 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9430 if (tr->name && strcmp(tr->name, name) == 0)
9431 goto out_unlock;
9432 }
9433
9434 tr = trace_array_create(name);
9435
9436 if (IS_ERR(tr))
9437 tr = NULL;
9438 out_unlock:
9439 if (tr)
9440 tr->ref++;
9441
9442 mutex_unlock(&trace_types_lock);
9443 mutex_unlock(&event_mutex);
9444 return tr;
9445 }
9446 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9447
__remove_instance(struct trace_array * tr)9448 static int __remove_instance(struct trace_array *tr)
9449 {
9450 int i;
9451
9452 /* Reference counter for a newly created trace array = 1. */
9453 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9454 return -EBUSY;
9455
9456 list_del(&tr->list);
9457
9458 /* Disable all the flags that were enabled coming in */
9459 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9460 if ((1 << i) & ZEROED_TRACE_FLAGS)
9461 set_tracer_flag(tr, 1 << i, 0);
9462 }
9463
9464 tracing_set_nop(tr);
9465 clear_ftrace_function_probes(tr);
9466 event_trace_del_tracer(tr);
9467 ftrace_clear_pids(tr);
9468 ftrace_destroy_function_files(tr);
9469 tracefs_remove(tr->dir);
9470 free_percpu(tr->last_func_repeats);
9471 free_trace_buffers(tr);
9472 clear_tracing_err_log(tr);
9473
9474 for (i = 0; i < tr->nr_topts; i++) {
9475 kfree(tr->topts[i].topts);
9476 }
9477 kfree(tr->topts);
9478
9479 free_cpumask_var(tr->tracing_cpumask);
9480 kfree(tr->name);
9481 kfree(tr);
9482
9483 return 0;
9484 }
9485
trace_array_destroy(struct trace_array * this_tr)9486 int trace_array_destroy(struct trace_array *this_tr)
9487 {
9488 struct trace_array *tr;
9489 int ret;
9490
9491 if (!this_tr)
9492 return -EINVAL;
9493
9494 mutex_lock(&event_mutex);
9495 mutex_lock(&trace_types_lock);
9496
9497 ret = -ENODEV;
9498
9499 /* Making sure trace array exists before destroying it. */
9500 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9501 if (tr == this_tr) {
9502 ret = __remove_instance(tr);
9503 break;
9504 }
9505 }
9506
9507 mutex_unlock(&trace_types_lock);
9508 mutex_unlock(&event_mutex);
9509
9510 return ret;
9511 }
9512 EXPORT_SYMBOL_GPL(trace_array_destroy);
9513
instance_rmdir(const char * name)9514 static int instance_rmdir(const char *name)
9515 {
9516 struct trace_array *tr;
9517 int ret;
9518
9519 mutex_lock(&event_mutex);
9520 mutex_lock(&trace_types_lock);
9521
9522 ret = -ENODEV;
9523 tr = trace_array_find(name);
9524 if (tr)
9525 ret = __remove_instance(tr);
9526
9527 mutex_unlock(&trace_types_lock);
9528 mutex_unlock(&event_mutex);
9529
9530 return ret;
9531 }
9532
create_trace_instances(struct dentry * d_tracer)9533 static __init void create_trace_instances(struct dentry *d_tracer)
9534 {
9535 struct trace_array *tr;
9536
9537 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9538 instance_mkdir,
9539 instance_rmdir);
9540 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9541 return;
9542
9543 mutex_lock(&event_mutex);
9544 mutex_lock(&trace_types_lock);
9545
9546 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9547 if (!tr->name)
9548 continue;
9549 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9550 "Failed to create instance directory\n"))
9551 break;
9552 }
9553
9554 mutex_unlock(&trace_types_lock);
9555 mutex_unlock(&event_mutex);
9556 }
9557
9558 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9559 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9560 {
9561 struct trace_event_file *file;
9562 int cpu;
9563
9564 trace_create_file("available_tracers", 0444, d_tracer,
9565 tr, &show_traces_fops);
9566
9567 trace_create_file("current_tracer", 0644, d_tracer,
9568 tr, &set_tracer_fops);
9569
9570 trace_create_file("tracing_cpumask", 0644, d_tracer,
9571 tr, &tracing_cpumask_fops);
9572
9573 trace_create_file("trace_options", 0644, d_tracer,
9574 tr, &tracing_iter_fops);
9575
9576 trace_create_file("trace", 0644, d_tracer,
9577 tr, &tracing_fops);
9578
9579 trace_create_file("trace_pipe", 0444, d_tracer,
9580 tr, &tracing_pipe_fops);
9581
9582 trace_create_file("buffer_size_kb", 0644, d_tracer,
9583 tr, &tracing_entries_fops);
9584
9585 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9586 tr, &tracing_total_entries_fops);
9587
9588 trace_create_file("free_buffer", 0200, d_tracer,
9589 tr, &tracing_free_buffer_fops);
9590
9591 trace_create_file("trace_marker", 0220, d_tracer,
9592 tr, &tracing_mark_fops);
9593
9594 file = __find_event_file(tr, "ftrace", "print");
9595 if (file && file->dir)
9596 trace_create_file("trigger", 0644, file->dir, file,
9597 &event_trigger_fops);
9598 tr->trace_marker_file = file;
9599
9600 trace_create_file("trace_marker_raw", 0220, d_tracer,
9601 tr, &tracing_mark_raw_fops);
9602
9603 trace_create_file("trace_clock", 0644, d_tracer, tr,
9604 &trace_clock_fops);
9605
9606 trace_create_file("tracing_on", 0644, d_tracer,
9607 tr, &rb_simple_fops);
9608
9609 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9610 &trace_time_stamp_mode_fops);
9611
9612 tr->buffer_percent = 50;
9613
9614 trace_create_file("buffer_percent", 0640, d_tracer,
9615 tr, &buffer_percent_fops);
9616
9617 create_trace_options_dir(tr);
9618
9619 #ifdef CONFIG_TRACER_MAX_TRACE
9620 trace_create_maxlat_file(tr, d_tracer);
9621 #endif
9622
9623 if (ftrace_create_function_files(tr, d_tracer))
9624 MEM_FAIL(1, "Could not allocate function filter files");
9625
9626 #ifdef CONFIG_TRACER_SNAPSHOT
9627 trace_create_file("snapshot", 0644, d_tracer,
9628 tr, &snapshot_fops);
9629 #endif
9630
9631 trace_create_file("error_log", 0644, d_tracer,
9632 tr, &tracing_err_log_fops);
9633
9634 for_each_tracing_cpu(cpu)
9635 tracing_init_tracefs_percpu(tr, cpu);
9636
9637 ftrace_init_tracefs(tr, d_tracer);
9638 }
9639
trace_automount(struct dentry * mntpt,void * ingore)9640 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9641 {
9642 struct vfsmount *mnt;
9643 struct file_system_type *type;
9644
9645 /*
9646 * To maintain backward compatibility for tools that mount
9647 * debugfs to get to the tracing facility, tracefs is automatically
9648 * mounted to the debugfs/tracing directory.
9649 */
9650 type = get_fs_type("tracefs");
9651 if (!type)
9652 return NULL;
9653 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9654 put_filesystem(type);
9655 if (IS_ERR(mnt))
9656 return NULL;
9657 mntget(mnt);
9658
9659 return mnt;
9660 }
9661
9662 /**
9663 * tracing_init_dentry - initialize top level trace array
9664 *
9665 * This is called when creating files or directories in the tracing
9666 * directory. It is called via fs_initcall() by any of the boot up code
9667 * and expects to return the dentry of the top level tracing directory.
9668 */
tracing_init_dentry(void)9669 int tracing_init_dentry(void)
9670 {
9671 struct trace_array *tr = &global_trace;
9672
9673 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9674 pr_warn("Tracing disabled due to lockdown\n");
9675 return -EPERM;
9676 }
9677
9678 /* The top level trace array uses NULL as parent */
9679 if (tr->dir)
9680 return 0;
9681
9682 if (WARN_ON(!tracefs_initialized()))
9683 return -ENODEV;
9684
9685 /*
9686 * As there may still be users that expect the tracing
9687 * files to exist in debugfs/tracing, we must automount
9688 * the tracefs file system there, so older tools still
9689 * work with the newer kernel.
9690 */
9691 tr->dir = debugfs_create_automount("tracing", NULL,
9692 trace_automount, NULL);
9693
9694 return 0;
9695 }
9696
9697 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9698 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9699
9700 static struct workqueue_struct *eval_map_wq __initdata;
9701 static struct work_struct eval_map_work __initdata;
9702
eval_map_work_func(struct work_struct * work)9703 static void __init eval_map_work_func(struct work_struct *work)
9704 {
9705 int len;
9706
9707 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9708 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9709 }
9710
trace_eval_init(void)9711 static int __init trace_eval_init(void)
9712 {
9713 INIT_WORK(&eval_map_work, eval_map_work_func);
9714
9715 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9716 if (!eval_map_wq) {
9717 pr_err("Unable to allocate eval_map_wq\n");
9718 /* Do work here */
9719 eval_map_work_func(&eval_map_work);
9720 return -ENOMEM;
9721 }
9722
9723 queue_work(eval_map_wq, &eval_map_work);
9724 return 0;
9725 }
9726
trace_eval_sync(void)9727 static int __init trace_eval_sync(void)
9728 {
9729 /* Make sure the eval map updates are finished */
9730 if (eval_map_wq)
9731 destroy_workqueue(eval_map_wq);
9732 return 0;
9733 }
9734
9735 late_initcall_sync(trace_eval_sync);
9736
9737
9738 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9739 static void trace_module_add_evals(struct module *mod)
9740 {
9741 if (!mod->num_trace_evals)
9742 return;
9743
9744 /*
9745 * Modules with bad taint do not have events created, do
9746 * not bother with enums either.
9747 */
9748 if (trace_module_has_bad_taint(mod))
9749 return;
9750
9751 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9752 }
9753
9754 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9755 static void trace_module_remove_evals(struct module *mod)
9756 {
9757 union trace_eval_map_item *map;
9758 union trace_eval_map_item **last = &trace_eval_maps;
9759
9760 if (!mod->num_trace_evals)
9761 return;
9762
9763 mutex_lock(&trace_eval_mutex);
9764
9765 map = trace_eval_maps;
9766
9767 while (map) {
9768 if (map->head.mod == mod)
9769 break;
9770 map = trace_eval_jmp_to_tail(map);
9771 last = &map->tail.next;
9772 map = map->tail.next;
9773 }
9774 if (!map)
9775 goto out;
9776
9777 *last = trace_eval_jmp_to_tail(map)->tail.next;
9778 kfree(map);
9779 out:
9780 mutex_unlock(&trace_eval_mutex);
9781 }
9782 #else
trace_module_remove_evals(struct module * mod)9783 static inline void trace_module_remove_evals(struct module *mod) { }
9784 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9785
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9786 static int trace_module_notify(struct notifier_block *self,
9787 unsigned long val, void *data)
9788 {
9789 struct module *mod = data;
9790
9791 switch (val) {
9792 case MODULE_STATE_COMING:
9793 trace_module_add_evals(mod);
9794 break;
9795 case MODULE_STATE_GOING:
9796 trace_module_remove_evals(mod);
9797 break;
9798 }
9799
9800 return NOTIFY_OK;
9801 }
9802
9803 static struct notifier_block trace_module_nb = {
9804 .notifier_call = trace_module_notify,
9805 .priority = 0,
9806 };
9807 #endif /* CONFIG_MODULES */
9808
tracer_init_tracefs(void)9809 static __init int tracer_init_tracefs(void)
9810 {
9811 int ret;
9812
9813 trace_access_lock_init();
9814
9815 ret = tracing_init_dentry();
9816 if (ret)
9817 return 0;
9818
9819 event_trace_init();
9820
9821 init_tracer_tracefs(&global_trace, NULL);
9822 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9823
9824 trace_create_file("tracing_thresh", 0644, NULL,
9825 &global_trace, &tracing_thresh_fops);
9826
9827 trace_create_file("README", 0444, NULL,
9828 NULL, &tracing_readme_fops);
9829
9830 trace_create_file("saved_cmdlines", 0444, NULL,
9831 NULL, &tracing_saved_cmdlines_fops);
9832
9833 trace_create_file("saved_cmdlines_size", 0644, NULL,
9834 NULL, &tracing_saved_cmdlines_size_fops);
9835
9836 trace_create_file("saved_tgids", 0444, NULL,
9837 NULL, &tracing_saved_tgids_fops);
9838
9839 trace_eval_init();
9840
9841 trace_create_eval_file(NULL);
9842
9843 #ifdef CONFIG_MODULES
9844 register_module_notifier(&trace_module_nb);
9845 #endif
9846
9847 #ifdef CONFIG_DYNAMIC_FTRACE
9848 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9849 NULL, &tracing_dyn_info_fops);
9850 #endif
9851
9852 create_trace_instances(NULL);
9853
9854 update_tracer_options(&global_trace);
9855
9856 return 0;
9857 }
9858
9859 fs_initcall(tracer_init_tracefs);
9860
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9861 static int trace_panic_handler(struct notifier_block *this,
9862 unsigned long event, void *unused)
9863 {
9864 bool ftrace_check = false;
9865
9866 trace_android_vh_ftrace_oops_enter(&ftrace_check);
9867
9868 if (ftrace_check)
9869 return NOTIFY_OK;
9870
9871 if (ftrace_dump_on_oops)
9872 ftrace_dump(ftrace_dump_on_oops);
9873
9874 trace_android_vh_ftrace_oops_exit(&ftrace_check);
9875 return NOTIFY_OK;
9876 }
9877
9878 static struct notifier_block trace_panic_notifier = {
9879 .notifier_call = trace_panic_handler,
9880 .next = NULL,
9881 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9882 };
9883
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9884 static int trace_die_handler(struct notifier_block *self,
9885 unsigned long val,
9886 void *data)
9887 {
9888 bool ftrace_check = false;
9889
9890 trace_android_vh_ftrace_oops_enter(&ftrace_check);
9891
9892 if (ftrace_check)
9893 return NOTIFY_OK;
9894
9895 switch (val) {
9896 case DIE_OOPS:
9897 if (ftrace_dump_on_oops)
9898 ftrace_dump(ftrace_dump_on_oops);
9899 break;
9900 default:
9901 break;
9902 }
9903
9904 trace_android_vh_ftrace_oops_exit(&ftrace_check);
9905 return NOTIFY_OK;
9906 }
9907
9908 static struct notifier_block trace_die_notifier = {
9909 .notifier_call = trace_die_handler,
9910 .priority = 200
9911 };
9912
9913 /*
9914 * printk is set to max of 1024, we really don't need it that big.
9915 * Nothing should be printing 1000 characters anyway.
9916 */
9917 #define TRACE_MAX_PRINT 1000
9918
9919 /*
9920 * Define here KERN_TRACE so that we have one place to modify
9921 * it if we decide to change what log level the ftrace dump
9922 * should be at.
9923 */
9924 #define KERN_TRACE KERN_EMERG
9925
9926 void
trace_printk_seq(struct trace_seq * s)9927 trace_printk_seq(struct trace_seq *s)
9928 {
9929 bool dump_printk = true;
9930
9931 /* Probably should print a warning here. */
9932 if (s->seq.len >= TRACE_MAX_PRINT)
9933 s->seq.len = TRACE_MAX_PRINT;
9934
9935 /*
9936 * More paranoid code. Although the buffer size is set to
9937 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9938 * an extra layer of protection.
9939 */
9940 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9941 s->seq.len = s->seq.size - 1;
9942
9943 /* should be zero ended, but we are paranoid. */
9944 s->buffer[s->seq.len] = 0;
9945
9946 trace_android_vh_ftrace_dump_buffer(s, &dump_printk);
9947 if (dump_printk)
9948 printk(KERN_TRACE "%s", s->buffer);
9949
9950 trace_seq_init(s);
9951 }
9952
trace_init_global_iter(struct trace_iterator * iter)9953 void trace_init_global_iter(struct trace_iterator *iter)
9954 {
9955 iter->tr = &global_trace;
9956 iter->trace = iter->tr->current_trace;
9957 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9958 iter->array_buffer = &global_trace.array_buffer;
9959
9960 if (iter->trace && iter->trace->open)
9961 iter->trace->open(iter);
9962
9963 /* Annotate start of buffers if we had overruns */
9964 if (ring_buffer_overruns(iter->array_buffer->buffer))
9965 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9966
9967 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9968 if (trace_clocks[iter->tr->clock_id].in_ns)
9969 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9970
9971 /* Can not use kmalloc for iter.temp and iter.fmt */
9972 iter->temp = static_temp_buf;
9973 iter->temp_size = STATIC_TEMP_BUF_SIZE;
9974 iter->fmt = static_fmt_buf;
9975 iter->fmt_size = STATIC_FMT_BUF_SIZE;
9976 }
9977
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9978 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9979 {
9980 /* use static because iter can be a bit big for the stack */
9981 static struct trace_iterator iter;
9982 static atomic_t dump_running;
9983 struct trace_array *tr = &global_trace;
9984 unsigned int old_userobj;
9985 unsigned long flags;
9986 int cnt = 0, cpu;
9987 bool ftrace_check = false;
9988 unsigned long size;
9989
9990 /* Only allow one dump user at a time. */
9991 if (atomic_inc_return(&dump_running) != 1) {
9992 atomic_dec(&dump_running);
9993 return;
9994 }
9995
9996 /*
9997 * Always turn off tracing when we dump.
9998 * We don't need to show trace output of what happens
9999 * between multiple crashes.
10000 *
10001 * If the user does a sysrq-z, then they can re-enable
10002 * tracing with echo 1 > tracing_on.
10003 */
10004 tracing_off();
10005
10006 local_irq_save(flags);
10007
10008 /* Simulate the iterator */
10009 trace_init_global_iter(&iter);
10010
10011 for_each_tracing_cpu(cpu) {
10012 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10013 size = ring_buffer_size(iter.array_buffer->buffer, cpu);
10014 trace_android_vh_ftrace_size_check(size, &ftrace_check);
10015 }
10016
10017 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10018
10019 /* don't look at user memory in panic mode */
10020 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10021
10022 if (ftrace_check)
10023 goto out_enable;
10024
10025 switch (oops_dump_mode) {
10026 case DUMP_ALL:
10027 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10028 break;
10029 case DUMP_ORIG:
10030 iter.cpu_file = raw_smp_processor_id();
10031 break;
10032 case DUMP_NONE:
10033 goto out_enable;
10034 default:
10035 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10036 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10037 }
10038
10039 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10040
10041 /* Did function tracer already get disabled? */
10042 if (ftrace_is_dead()) {
10043 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10044 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10045 }
10046
10047 /*
10048 * We need to stop all tracing on all CPUS to read
10049 * the next buffer. This is a bit expensive, but is
10050 * not done often. We fill all what we can read,
10051 * and then release the locks again.
10052 */
10053
10054 while (!trace_empty(&iter)) {
10055 ftrace_check = true;
10056
10057 if (!cnt)
10058 printk(KERN_TRACE "---------------------------------\n");
10059
10060 cnt++;
10061
10062 trace_iterator_reset(&iter);
10063 trace_android_vh_ftrace_format_check(&ftrace_check);
10064 if (ftrace_check)
10065 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10066
10067 if (trace_find_next_entry_inc(&iter) != NULL) {
10068 int ret;
10069
10070 ret = print_trace_line(&iter);
10071 if (ret != TRACE_TYPE_NO_CONSUME)
10072 trace_consume(&iter);
10073 }
10074 touch_nmi_watchdog();
10075
10076 trace_printk_seq(&iter.seq);
10077 }
10078
10079 if (!cnt)
10080 printk(KERN_TRACE " (ftrace buffer empty)\n");
10081 else
10082 printk(KERN_TRACE "---------------------------------\n");
10083
10084 out_enable:
10085 tr->trace_flags |= old_userobj;
10086
10087 for_each_tracing_cpu(cpu) {
10088 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10089 }
10090 atomic_dec(&dump_running);
10091 local_irq_restore(flags);
10092 }
10093 EXPORT_SYMBOL_GPL(ftrace_dump);
10094
10095 #define WRITE_BUFSIZE 4096
10096
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10097 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10098 size_t count, loff_t *ppos,
10099 int (*createfn)(const char *))
10100 {
10101 char *kbuf, *buf, *tmp;
10102 int ret = 0;
10103 size_t done = 0;
10104 size_t size;
10105
10106 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10107 if (!kbuf)
10108 return -ENOMEM;
10109
10110 while (done < count) {
10111 size = count - done;
10112
10113 if (size >= WRITE_BUFSIZE)
10114 size = WRITE_BUFSIZE - 1;
10115
10116 if (copy_from_user(kbuf, buffer + done, size)) {
10117 ret = -EFAULT;
10118 goto out;
10119 }
10120 kbuf[size] = '\0';
10121 buf = kbuf;
10122 do {
10123 tmp = strchr(buf, '\n');
10124 if (tmp) {
10125 *tmp = '\0';
10126 size = tmp - buf + 1;
10127 } else {
10128 size = strlen(buf);
10129 if (done + size < count) {
10130 if (buf != kbuf)
10131 break;
10132 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10133 pr_warn("Line length is too long: Should be less than %d\n",
10134 WRITE_BUFSIZE - 2);
10135 ret = -EINVAL;
10136 goto out;
10137 }
10138 }
10139 done += size;
10140
10141 /* Remove comments */
10142 tmp = strchr(buf, '#');
10143
10144 if (tmp)
10145 *tmp = '\0';
10146
10147 ret = createfn(buf);
10148 if (ret)
10149 goto out;
10150 buf += size;
10151
10152 } while (done < count);
10153 }
10154 ret = done;
10155
10156 out:
10157 kfree(kbuf);
10158
10159 return ret;
10160 }
10161
tracer_alloc_buffers(void)10162 __init static int tracer_alloc_buffers(void)
10163 {
10164 int ring_buf_size;
10165 int ret = -ENOMEM;
10166
10167
10168 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10169 pr_warn("Tracing disabled due to lockdown\n");
10170 return -EPERM;
10171 }
10172
10173 /*
10174 * Make sure we don't accidentally add more trace options
10175 * than we have bits for.
10176 */
10177 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10178
10179 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10180 goto out;
10181
10182 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10183 goto out_free_buffer_mask;
10184
10185 /* Only allocate trace_printk buffers if a trace_printk exists */
10186 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10187 /* Must be called before global_trace.buffer is allocated */
10188 trace_printk_init_buffers();
10189
10190 /* To save memory, keep the ring buffer size to its minimum */
10191 if (ring_buffer_expanded)
10192 ring_buf_size = trace_buf_size;
10193 else
10194 ring_buf_size = 1;
10195
10196 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10197 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10198
10199 raw_spin_lock_init(&global_trace.start_lock);
10200
10201 /*
10202 * The prepare callbacks allocates some memory for the ring buffer. We
10203 * don't free the buffer if the CPU goes down. If we were to free
10204 * the buffer, then the user would lose any trace that was in the
10205 * buffer. The memory will be removed once the "instance" is removed.
10206 */
10207 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10208 "trace/RB:preapre", trace_rb_cpu_prepare,
10209 NULL);
10210 if (ret < 0)
10211 goto out_free_cpumask;
10212 /* Used for event triggers */
10213 ret = -ENOMEM;
10214 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10215 if (!temp_buffer)
10216 goto out_rm_hp_state;
10217
10218 if (trace_create_savedcmd() < 0)
10219 goto out_free_temp_buffer;
10220
10221 /* TODO: make the number of buffers hot pluggable with CPUS */
10222 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10223 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10224 goto out_free_savedcmd;
10225 }
10226
10227 if (global_trace.buffer_disabled)
10228 tracing_off();
10229
10230 if (trace_boot_clock) {
10231 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10232 if (ret < 0)
10233 pr_warn("Trace clock %s not defined, going back to default\n",
10234 trace_boot_clock);
10235 }
10236
10237 /*
10238 * register_tracer() might reference current_trace, so it
10239 * needs to be set before we register anything. This is
10240 * just a bootstrap of current_trace anyway.
10241 */
10242 global_trace.current_trace = &nop_trace;
10243
10244 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10245
10246 ftrace_init_global_array_ops(&global_trace);
10247
10248 init_trace_flags_index(&global_trace);
10249
10250 register_tracer(&nop_trace);
10251
10252 /* Function tracing may start here (via kernel command line) */
10253 init_function_trace();
10254
10255 /* All seems OK, enable tracing */
10256 tracing_disabled = 0;
10257
10258 atomic_notifier_chain_register(&panic_notifier_list,
10259 &trace_panic_notifier);
10260
10261 register_die_notifier(&trace_die_notifier);
10262
10263 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10264
10265 INIT_LIST_HEAD(&global_trace.systems);
10266 INIT_LIST_HEAD(&global_trace.events);
10267 INIT_LIST_HEAD(&global_trace.hist_vars);
10268 INIT_LIST_HEAD(&global_trace.err_log);
10269 list_add(&global_trace.list, &ftrace_trace_arrays);
10270
10271 apply_trace_boot_options();
10272
10273 register_snapshot_cmd();
10274
10275 test_can_verify();
10276
10277 return 0;
10278
10279 out_free_savedcmd:
10280 free_saved_cmdlines_buffer(savedcmd);
10281 out_free_temp_buffer:
10282 ring_buffer_free(temp_buffer);
10283 out_rm_hp_state:
10284 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10285 out_free_cpumask:
10286 free_cpumask_var(global_trace.tracing_cpumask);
10287 out_free_buffer_mask:
10288 free_cpumask_var(tracing_buffer_mask);
10289 out:
10290 return ret;
10291 }
10292
early_trace_init(void)10293 void __init early_trace_init(void)
10294 {
10295 if (tracepoint_printk) {
10296 tracepoint_print_iter =
10297 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10298 if (MEM_FAIL(!tracepoint_print_iter,
10299 "Failed to allocate trace iterator\n"))
10300 tracepoint_printk = 0;
10301 else
10302 static_key_enable(&tracepoint_printk_key.key);
10303 }
10304 tracer_alloc_buffers();
10305
10306 init_events();
10307 }
10308
trace_init(void)10309 void __init trace_init(void)
10310 {
10311 trace_event_init();
10312 }
10313
clear_boot_tracer(void)10314 __init static void clear_boot_tracer(void)
10315 {
10316 /*
10317 * The default tracer at boot buffer is an init section.
10318 * This function is called in lateinit. If we did not
10319 * find the boot tracer, then clear it out, to prevent
10320 * later registration from accessing the buffer that is
10321 * about to be freed.
10322 */
10323 if (!default_bootup_tracer)
10324 return;
10325
10326 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10327 default_bootup_tracer);
10328 default_bootup_tracer = NULL;
10329 }
10330
10331 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10332 __init static void tracing_set_default_clock(void)
10333 {
10334 /* sched_clock_stable() is determined in late_initcall */
10335 if (!trace_boot_clock && !sched_clock_stable()) {
10336 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10337 pr_warn("Can not set tracing clock due to lockdown\n");
10338 return;
10339 }
10340
10341 printk(KERN_WARNING
10342 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10343 "If you want to keep using the local clock, then add:\n"
10344 " \"trace_clock=local\"\n"
10345 "on the kernel command line\n");
10346 tracing_set_clock(&global_trace, "global");
10347 }
10348 }
10349 #else
tracing_set_default_clock(void)10350 static inline void tracing_set_default_clock(void) { }
10351 #endif
10352
late_trace_init(void)10353 __init static int late_trace_init(void)
10354 {
10355 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10356 static_key_disable(&tracepoint_printk_key.key);
10357 tracepoint_printk = 0;
10358 }
10359
10360 tracing_set_default_clock();
10361 clear_boot_tracer();
10362 return 0;
10363 }
10364
10365 late_initcall_sync(late_trace_init);
10366