1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
58 */
59 bool ring_buffer_expanded;
60
61 /*
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
67 */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71 * If boot-time tracing including tracers/events via kernel cmdline
72 * is running, we do not want to run SELFTEST.
73 */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 { }
94 };
95
96 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 return 0;
100 }
101
102 /*
103 * To prevent the comm cache from being overwritten when no
104 * tracing is active, only save the comm when a trace event
105 * occurred.
106 */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110 * Kill all tracing for good (never come back).
111 * It is initialized to 1 but will turn to zero if the initialization
112 * of the tracer is successful. But that is the only place that sets
113 * this back to zero.
114 */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly tracing_buffer_mask;
118
119 /*
120 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121 *
122 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123 * is set, then ftrace_dump is called. This will output the contents
124 * of the ftrace buffers to the console. This is very useful for
125 * capturing traces that lead to crashes and outputing it to a
126 * serial console.
127 *
128 * It is default off, but you can enable it with either specifying
129 * "ftrace_dump_on_oops" in the kernel command line, or setting
130 * /proc/sys/kernel/ftrace_dump_on_oops
131 * Set 1 if you want to dump buffers of all CPUs
132 * Set 2 if you want to dump the buffer of the CPU that triggered oops
133 */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 struct module *mod;
144 unsigned long length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150 /*
151 * "end" is first and points to NULL as it must be different
152 * than "mod" or "eval_string"
153 */
154 union trace_eval_map_item *next;
155 const char *end; /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161 * The trace_eval_maps are saved in an array with two extra elements,
162 * one at the beginning, and one at the end. The beginning item contains
163 * the count of the saved maps (head.length), and the module they
164 * belong to if not built in (head.mod). The ending item contains a
165 * pointer to the next array of saved eval_map items.
166 */
167 union trace_eval_map_item {
168 struct trace_eval_map map;
169 struct trace_eval_map_head head;
170 struct trace_eval_map_tail tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 struct trace_buffer *buffer,
179 unsigned long flags, int pc);
180
181 #define MAX_TRACER_SIZE 100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
set_cmdline_ftrace(char * str)187 static int __init set_cmdline_ftrace(char *str)
188 {
189 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 default_bootup_tracer = bootup_tracer_buf;
191 /* We are using ftrace early, expand it */
192 ring_buffer_expanded = true;
193 return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
set_ftrace_dump_on_oops(char * str)197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199 if (*str++ != '=' || !*str) {
200 ftrace_dump_on_oops = DUMP_ALL;
201 return 1;
202 }
203
204 if (!strcmp("orig_cpu", str)) {
205 ftrace_dump_on_oops = DUMP_ORIG;
206 return 1;
207 }
208
209 return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
stop_trace_on_warning(char * str)213 static int __init stop_trace_on_warning(char *str)
214 {
215 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 __disable_trace_on_warning = 1;
217 return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
boot_alloc_snapshot(char * str)221 static int __init boot_alloc_snapshot(char *str)
222 {
223 allocate_snapshot = true;
224 /* We also need the main ring buffer expanded */
225 ring_buffer_expanded = true;
226 return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
set_trace_boot_options(char * str)233 static int __init set_trace_boot_options(char *str)
234 {
235 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236 return 1;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
set_trace_boot_clock(char * str)243 static int __init set_trace_boot_clock(char *str)
244 {
245 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 trace_boot_clock = trace_boot_clock_buf;
247 return 1;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
set_tracepoint_printk(char * str)251 static int __init set_tracepoint_printk(char *str)
252 {
253 /* Ignore the "tp_printk_stop_on_boot" param */
254 if (*str == '_')
255 return 0;
256
257 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
258 tracepoint_printk = 1;
259 return 1;
260 }
261 __setup("tp_printk", set_tracepoint_printk);
262
ns2usecs(u64 nsec)263 unsigned long long ns2usecs(u64 nsec)
264 {
265 nsec += 500;
266 do_div(nsec, 1000);
267 return nsec;
268 }
269
270 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)271 trace_process_export(struct trace_export *export,
272 struct ring_buffer_event *event, int flag)
273 {
274 struct trace_entry *entry;
275 unsigned int size = 0;
276
277 if (export->flags & flag) {
278 entry = ring_buffer_event_data(event);
279 size = ring_buffer_event_length(event);
280 export->write(export, entry, size);
281 }
282 }
283
284 static DEFINE_MUTEX(ftrace_export_lock);
285
286 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
287
288 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
289 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
290 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
291
ftrace_exports_enable(struct trace_export * export)292 static inline void ftrace_exports_enable(struct trace_export *export)
293 {
294 if (export->flags & TRACE_EXPORT_FUNCTION)
295 static_branch_inc(&trace_function_exports_enabled);
296
297 if (export->flags & TRACE_EXPORT_EVENT)
298 static_branch_inc(&trace_event_exports_enabled);
299
300 if (export->flags & TRACE_EXPORT_MARKER)
301 static_branch_inc(&trace_marker_exports_enabled);
302 }
303
ftrace_exports_disable(struct trace_export * export)304 static inline void ftrace_exports_disable(struct trace_export *export)
305 {
306 if (export->flags & TRACE_EXPORT_FUNCTION)
307 static_branch_dec(&trace_function_exports_enabled);
308
309 if (export->flags & TRACE_EXPORT_EVENT)
310 static_branch_dec(&trace_event_exports_enabled);
311
312 if (export->flags & TRACE_EXPORT_MARKER)
313 static_branch_dec(&trace_marker_exports_enabled);
314 }
315
ftrace_exports(struct ring_buffer_event * event,int flag)316 static void ftrace_exports(struct ring_buffer_event *event, int flag)
317 {
318 struct trace_export *export;
319
320 preempt_disable_notrace();
321
322 export = rcu_dereference_raw_check(ftrace_exports_list);
323 while (export) {
324 trace_process_export(export, event, flag);
325 export = rcu_dereference_raw_check(export->next);
326 }
327
328 preempt_enable_notrace();
329 }
330
331 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)332 add_trace_export(struct trace_export **list, struct trace_export *export)
333 {
334 rcu_assign_pointer(export->next, *list);
335 /*
336 * We are entering export into the list but another
337 * CPU might be walking that list. We need to make sure
338 * the export->next pointer is valid before another CPU sees
339 * the export pointer included into the list.
340 */
341 rcu_assign_pointer(*list, export);
342 }
343
344 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)345 rm_trace_export(struct trace_export **list, struct trace_export *export)
346 {
347 struct trace_export **p;
348
349 for (p = list; *p != NULL; p = &(*p)->next)
350 if (*p == export)
351 break;
352
353 if (*p != export)
354 return -1;
355
356 rcu_assign_pointer(*p, (*p)->next);
357
358 return 0;
359 }
360
361 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)362 add_ftrace_export(struct trace_export **list, struct trace_export *export)
363 {
364 ftrace_exports_enable(export);
365
366 add_trace_export(list, export);
367 }
368
369 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)370 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
371 {
372 int ret;
373
374 ret = rm_trace_export(list, export);
375 ftrace_exports_disable(export);
376
377 return ret;
378 }
379
register_ftrace_export(struct trace_export * export)380 int register_ftrace_export(struct trace_export *export)
381 {
382 if (WARN_ON_ONCE(!export->write))
383 return -1;
384
385 mutex_lock(&ftrace_export_lock);
386
387 add_ftrace_export(&ftrace_exports_list, export);
388
389 mutex_unlock(&ftrace_export_lock);
390
391 return 0;
392 }
393 EXPORT_SYMBOL_GPL(register_ftrace_export);
394
unregister_ftrace_export(struct trace_export * export)395 int unregister_ftrace_export(struct trace_export *export)
396 {
397 int ret;
398
399 mutex_lock(&ftrace_export_lock);
400
401 ret = rm_ftrace_export(&ftrace_exports_list, export);
402
403 mutex_unlock(&ftrace_export_lock);
404
405 return ret;
406 }
407 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
408
409 /* trace_flags holds trace_options default values */
410 #define TRACE_DEFAULT_FLAGS \
411 (FUNCTION_DEFAULT_FLAGS | \
412 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
413 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
414 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
415 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
416
417 /* trace_options that are only supported by global_trace */
418 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
419 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
420
421 /* trace_flags that are default zero for instances */
422 #define ZEROED_TRACE_FLAGS \
423 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
424
425 /*
426 * The global_trace is the descriptor that holds the top-level tracing
427 * buffers for the live tracing.
428 */
429 static struct trace_array global_trace = {
430 .trace_flags = TRACE_DEFAULT_FLAGS,
431 };
432
433 LIST_HEAD(ftrace_trace_arrays);
434
trace_array_get(struct trace_array * this_tr)435 int trace_array_get(struct trace_array *this_tr)
436 {
437 struct trace_array *tr;
438 int ret = -ENODEV;
439
440 mutex_lock(&trace_types_lock);
441 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
442 if (tr == this_tr) {
443 tr->ref++;
444 ret = 0;
445 break;
446 }
447 }
448 mutex_unlock(&trace_types_lock);
449
450 return ret;
451 }
452
__trace_array_put(struct trace_array * this_tr)453 static void __trace_array_put(struct trace_array *this_tr)
454 {
455 WARN_ON(!this_tr->ref);
456 this_tr->ref--;
457 }
458
459 /**
460 * trace_array_put - Decrement the reference counter for this trace array.
461 *
462 * NOTE: Use this when we no longer need the trace array returned by
463 * trace_array_get_by_name(). This ensures the trace array can be later
464 * destroyed.
465 *
466 */
trace_array_put(struct trace_array * this_tr)467 void trace_array_put(struct trace_array *this_tr)
468 {
469 if (!this_tr)
470 return;
471
472 mutex_lock(&trace_types_lock);
473 __trace_array_put(this_tr);
474 mutex_unlock(&trace_types_lock);
475 }
476 EXPORT_SYMBOL_GPL(trace_array_put);
477
tracing_check_open_get_tr(struct trace_array * tr)478 int tracing_check_open_get_tr(struct trace_array *tr)
479 {
480 int ret;
481
482 ret = security_locked_down(LOCKDOWN_TRACEFS);
483 if (ret)
484 return ret;
485
486 if (tracing_disabled)
487 return -ENODEV;
488
489 if (tr && trace_array_get(tr) < 0)
490 return -ENODEV;
491
492 return 0;
493 }
494
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)495 int call_filter_check_discard(struct trace_event_call *call, void *rec,
496 struct trace_buffer *buffer,
497 struct ring_buffer_event *event)
498 {
499 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
500 !filter_match_preds(call->filter, rec)) {
501 __trace_event_discard_commit(buffer, event);
502 return 1;
503 }
504
505 return 0;
506 }
507
trace_free_pid_list(struct trace_pid_list * pid_list)508 void trace_free_pid_list(struct trace_pid_list *pid_list)
509 {
510 vfree(pid_list->pids);
511 kfree(pid_list);
512 }
513
514 /**
515 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
516 * @filtered_pids: The list of pids to check
517 * @search_pid: The PID to find in @filtered_pids
518 *
519 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
520 */
521 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)522 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
523 {
524 /*
525 * If pid_max changed after filtered_pids was created, we
526 * by default ignore all pids greater than the previous pid_max.
527 */
528 if (search_pid >= filtered_pids->pid_max)
529 return false;
530
531 return test_bit(search_pid, filtered_pids->pids);
532 }
533
534 /**
535 * trace_ignore_this_task - should a task be ignored for tracing
536 * @filtered_pids: The list of pids to check
537 * @task: The task that should be ignored if not filtered
538 *
539 * Checks if @task should be traced or not from @filtered_pids.
540 * Returns true if @task should *NOT* be traced.
541 * Returns false if @task should be traced.
542 */
543 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)544 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
545 struct trace_pid_list *filtered_no_pids,
546 struct task_struct *task)
547 {
548 /*
549 * If filterd_no_pids is not empty, and the task's pid is listed
550 * in filtered_no_pids, then return true.
551 * Otherwise, if filtered_pids is empty, that means we can
552 * trace all tasks. If it has content, then only trace pids
553 * within filtered_pids.
554 */
555
556 return (filtered_pids &&
557 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
558 (filtered_no_pids &&
559 trace_find_filtered_pid(filtered_no_pids, task->pid));
560 }
561
562 /**
563 * trace_filter_add_remove_task - Add or remove a task from a pid_list
564 * @pid_list: The list to modify
565 * @self: The current task for fork or NULL for exit
566 * @task: The task to add or remove
567 *
568 * If adding a task, if @self is defined, the task is only added if @self
569 * is also included in @pid_list. This happens on fork and tasks should
570 * only be added when the parent is listed. If @self is NULL, then the
571 * @task pid will be removed from the list, which would happen on exit
572 * of a task.
573 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)574 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
575 struct task_struct *self,
576 struct task_struct *task)
577 {
578 if (!pid_list)
579 return;
580
581 /* For forks, we only add if the forking task is listed */
582 if (self) {
583 if (!trace_find_filtered_pid(pid_list, self->pid))
584 return;
585 }
586
587 /* Sorry, but we don't support pid_max changing after setting */
588 if (task->pid >= pid_list->pid_max)
589 return;
590
591 /* "self" is set for forks, and NULL for exits */
592 if (self)
593 set_bit(task->pid, pid_list->pids);
594 else
595 clear_bit(task->pid, pid_list->pids);
596 }
597
598 /**
599 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
600 * @pid_list: The pid list to show
601 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
602 * @pos: The position of the file
603 *
604 * This is used by the seq_file "next" operation to iterate the pids
605 * listed in a trace_pid_list structure.
606 *
607 * Returns the pid+1 as we want to display pid of zero, but NULL would
608 * stop the iteration.
609 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)610 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
611 {
612 unsigned long pid = (unsigned long)v;
613
614 (*pos)++;
615
616 /* pid already is +1 of the actual prevous bit */
617 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
618
619 /* Return pid + 1 to allow zero to be represented */
620 if (pid < pid_list->pid_max)
621 return (void *)(pid + 1);
622
623 return NULL;
624 }
625
626 /**
627 * trace_pid_start - Used for seq_file to start reading pid lists
628 * @pid_list: The pid list to show
629 * @pos: The position of the file
630 *
631 * This is used by seq_file "start" operation to start the iteration
632 * of listing pids.
633 *
634 * Returns the pid+1 as we want to display pid of zero, but NULL would
635 * stop the iteration.
636 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)637 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
638 {
639 unsigned long pid;
640 loff_t l = 0;
641
642 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
643 if (pid >= pid_list->pid_max)
644 return NULL;
645
646 /* Return pid + 1 so that zero can be the exit value */
647 for (pid++; pid && l < *pos;
648 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
649 ;
650 return (void *)pid;
651 }
652
653 /**
654 * trace_pid_show - show the current pid in seq_file processing
655 * @m: The seq_file structure to write into
656 * @v: A void pointer of the pid (+1) value to display
657 *
658 * Can be directly used by seq_file operations to display the current
659 * pid value.
660 */
trace_pid_show(struct seq_file * m,void * v)661 int trace_pid_show(struct seq_file *m, void *v)
662 {
663 unsigned long pid = (unsigned long)v - 1;
664
665 seq_printf(m, "%lu\n", pid);
666 return 0;
667 }
668
669 /* 128 should be much more than enough */
670 #define PID_BUF_SIZE 127
671
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)672 int trace_pid_write(struct trace_pid_list *filtered_pids,
673 struct trace_pid_list **new_pid_list,
674 const char __user *ubuf, size_t cnt)
675 {
676 struct trace_pid_list *pid_list;
677 struct trace_parser parser;
678 unsigned long val;
679 int nr_pids = 0;
680 ssize_t read = 0;
681 ssize_t ret = 0;
682 loff_t pos;
683 pid_t pid;
684
685 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
686 return -ENOMEM;
687
688 /*
689 * Always recreate a new array. The write is an all or nothing
690 * operation. Always create a new array when adding new pids by
691 * the user. If the operation fails, then the current list is
692 * not modified.
693 */
694 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
695 if (!pid_list) {
696 trace_parser_put(&parser);
697 return -ENOMEM;
698 }
699
700 pid_list->pid_max = READ_ONCE(pid_max);
701
702 /* Only truncating will shrink pid_max */
703 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
704 pid_list->pid_max = filtered_pids->pid_max;
705
706 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
707 if (!pid_list->pids) {
708 trace_parser_put(&parser);
709 kfree(pid_list);
710 return -ENOMEM;
711 }
712
713 if (filtered_pids) {
714 /* copy the current bits to the new max */
715 for_each_set_bit(pid, filtered_pids->pids,
716 filtered_pids->pid_max) {
717 set_bit(pid, pid_list->pids);
718 nr_pids++;
719 }
720 }
721
722 while (cnt > 0) {
723
724 pos = 0;
725
726 ret = trace_get_user(&parser, ubuf, cnt, &pos);
727 if (ret < 0 || !trace_parser_loaded(&parser))
728 break;
729
730 read += ret;
731 ubuf += ret;
732 cnt -= ret;
733
734 ret = -EINVAL;
735 if (kstrtoul(parser.buffer, 0, &val))
736 break;
737 if (val >= pid_list->pid_max)
738 break;
739
740 pid = (pid_t)val;
741
742 set_bit(pid, pid_list->pids);
743 nr_pids++;
744
745 trace_parser_clear(&parser);
746 ret = 0;
747 }
748 trace_parser_put(&parser);
749
750 if (ret < 0) {
751 trace_free_pid_list(pid_list);
752 return ret;
753 }
754
755 if (!nr_pids) {
756 /* Cleared the list of pids */
757 trace_free_pid_list(pid_list);
758 read = ret;
759 pid_list = NULL;
760 }
761
762 *new_pid_list = pid_list;
763
764 return read;
765 }
766
buffer_ftrace_now(struct array_buffer * buf,int cpu)767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769 u64 ts;
770
771 /* Early boot up does not have a buffer yet */
772 if (!buf->buffer)
773 return trace_clock_local();
774
775 ts = ring_buffer_time_stamp(buf->buffer, cpu);
776 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778 return ts;
779 }
780
ftrace_now(int cpu)781 u64 ftrace_now(int cpu)
782 {
783 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787 * tracing_is_enabled - Show if global_trace has been disabled
788 *
789 * Shows if the global trace has been enabled or not. It uses the
790 * mirror flag "buffer_disabled" to be used in fast paths such as for
791 * the irqsoff tracer. But it may be inaccurate due to races. If you
792 * need to know the accurate state, use tracing_is_on() which is a little
793 * slower, but accurate.
794 */
tracing_is_enabled(void)795 int tracing_is_enabled(void)
796 {
797 /*
798 * For quick access (irqsoff uses this in fast path), just
799 * return the mirror variable of the state of the ring buffer.
800 * It's a little racy, but we don't really care.
801 */
802 smp_rmb();
803 return !global_trace.buffer_disabled;
804 }
805
806 /*
807 * trace_buf_size is the size in bytes that is allocated
808 * for a buffer. Note, the number of bytes is always rounded
809 * to page size.
810 *
811 * This number is purposely set to a low number of 16384.
812 * If the dump on oops happens, it will be much appreciated
813 * to not have to wait for all that output. Anyway this can be
814 * boot time and run time configurable.
815 */
816 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer *trace_types __read_mostly;
822
823 /*
824 * trace_types_lock is used to protect the trace_types list.
825 */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829 * serialize the access of the ring buffer
830 *
831 * ring buffer serializes readers, but it is low level protection.
832 * The validity of the events (which returns by ring_buffer_peek() ..etc)
833 * are not protected by ring buffer.
834 *
835 * The content of events may become garbage if we allow other process consumes
836 * these events concurrently:
837 * A) the page of the consumed events may become a normal page
838 * (not reader page) in ring buffer, and this page will be rewrited
839 * by events producer.
840 * B) The page of the consumed events may become a page for splice_read,
841 * and this page will be returned to system.
842 *
843 * These primitives allow multi process access to different cpu ring buffer
844 * concurrently.
845 *
846 * These primitives don't distinguish read-only and read-consume access.
847 * Multi read-only access are also serialized.
848 */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
trace_access_lock(int cpu)854 static inline void trace_access_lock(int cpu)
855 {
856 if (cpu == RING_BUFFER_ALL_CPUS) {
857 /* gain it for accessing the whole ring buffer. */
858 down_write(&all_cpu_access_lock);
859 } else {
860 /* gain it for accessing a cpu ring buffer. */
861
862 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863 down_read(&all_cpu_access_lock);
864
865 /* Secondly block other access to this @cpu ring buffer. */
866 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867 }
868 }
869
trace_access_unlock(int cpu)870 static inline void trace_access_unlock(int cpu)
871 {
872 if (cpu == RING_BUFFER_ALL_CPUS) {
873 up_write(&all_cpu_access_lock);
874 } else {
875 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876 up_read(&all_cpu_access_lock);
877 }
878 }
879
trace_access_lock_init(void)880 static inline void trace_access_lock_init(void)
881 {
882 int cpu;
883
884 for_each_possible_cpu(cpu)
885 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
trace_access_lock(int cpu)892 static inline void trace_access_lock(int cpu)
893 {
894 (void)cpu;
895 mutex_lock(&access_lock);
896 }
897
trace_access_unlock(int cpu)898 static inline void trace_access_unlock(int cpu)
899 {
900 (void)cpu;
901 mutex_unlock(&access_lock);
902 }
903
trace_access_lock_init(void)904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912 unsigned long flags,
913 int skip, int pc, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915 struct trace_buffer *buffer,
916 unsigned long flags,
917 int skip, int pc, struct pt_regs *regs);
918
919 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921 unsigned long flags,
922 int skip, int pc, struct pt_regs *regs)
923 {
924 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)925 static inline void ftrace_trace_stack(struct trace_array *tr,
926 struct trace_buffer *buffer,
927 unsigned long flags,
928 int skip, int pc, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)935 trace_event_setup(struct ring_buffer_event *event,
936 int type, unsigned long flags, int pc)
937 {
938 struct trace_entry *ent = ring_buffer_event_data(event);
939
940 tracing_generic_entry_update(ent, type, flags, pc);
941 }
942
943 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945 int type,
946 unsigned long len,
947 unsigned long flags, int pc)
948 {
949 struct ring_buffer_event *event;
950
951 event = ring_buffer_lock_reserve(buffer, len);
952 if (event != NULL)
953 trace_event_setup(event, type, flags, pc);
954
955 return event;
956 }
957
tracer_tracing_on(struct trace_array * tr)958 void tracer_tracing_on(struct trace_array *tr)
959 {
960 if (tr->array_buffer.buffer)
961 ring_buffer_record_on(tr->array_buffer.buffer);
962 /*
963 * This flag is looked at when buffers haven't been allocated
964 * yet, or by some tracers (like irqsoff), that just want to
965 * know if the ring buffer has been disabled, but it can handle
966 * races of where it gets disabled but we still do a record.
967 * As the check is in the fast path of the tracers, it is more
968 * important to be fast than accurate.
969 */
970 tr->buffer_disabled = 0;
971 /* Make the flag seen by readers */
972 smp_wmb();
973 }
974
975 /**
976 * tracing_on - enable tracing buffers
977 *
978 * This function enables tracing buffers that may have been
979 * disabled with tracing_off.
980 */
tracing_on(void)981 void tracing_on(void)
982 {
983 tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991 __this_cpu_write(trace_taskinfo_save, true);
992
993 /* If this is the temp buffer, we need to commit fully */
994 if (this_cpu_read(trace_buffered_event) == event) {
995 /* Length is in event->array[0] */
996 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997 /* Release the temp buffer */
998 this_cpu_dec(trace_buffered_event_cnt);
999 } else
1000 ring_buffer_unlock_commit(buffer, event);
1001 }
1002
1003 /**
1004 * __trace_puts - write a constant string into the trace buffer.
1005 * @ip: The address of the caller
1006 * @str: The constant string to write
1007 * @size: The size of the string.
1008 */
__trace_puts(unsigned long ip,const char * str,int size)1009 int __trace_puts(unsigned long ip, const char *str, int size)
1010 {
1011 struct ring_buffer_event *event;
1012 struct trace_buffer *buffer;
1013 struct print_entry *entry;
1014 unsigned long irq_flags;
1015 int alloc;
1016 int pc;
1017
1018 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1019 return 0;
1020
1021 pc = preempt_count();
1022
1023 if (unlikely(tracing_selftest_running || tracing_disabled))
1024 return 0;
1025
1026 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1027
1028 local_save_flags(irq_flags);
1029 buffer = global_trace.array_buffer.buffer;
1030 ring_buffer_nest_start(buffer);
1031 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1032 irq_flags, pc);
1033 if (!event) {
1034 size = 0;
1035 goto out;
1036 }
1037
1038 entry = ring_buffer_event_data(event);
1039 entry->ip = ip;
1040
1041 memcpy(&entry->buf, str, size);
1042
1043 /* Add a newline if necessary */
1044 if (entry->buf[size - 1] != '\n') {
1045 entry->buf[size] = '\n';
1046 entry->buf[size + 1] = '\0';
1047 } else
1048 entry->buf[size] = '\0';
1049
1050 __buffer_unlock_commit(buffer, event);
1051 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1052 out:
1053 ring_buffer_nest_end(buffer);
1054 return size;
1055 }
1056 EXPORT_SYMBOL_GPL(__trace_puts);
1057
1058 /**
1059 * __trace_bputs - write the pointer to a constant string into trace buffer
1060 * @ip: The address of the caller
1061 * @str: The constant string to write to the buffer to
1062 */
__trace_bputs(unsigned long ip,const char * str)1063 int __trace_bputs(unsigned long ip, const char *str)
1064 {
1065 struct ring_buffer_event *event;
1066 struct trace_buffer *buffer;
1067 struct bputs_entry *entry;
1068 unsigned long irq_flags;
1069 int size = sizeof(struct bputs_entry);
1070 int ret = 0;
1071 int pc;
1072
1073 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1074 return 0;
1075
1076 pc = preempt_count();
1077
1078 if (unlikely(tracing_selftest_running || tracing_disabled))
1079 return 0;
1080
1081 local_save_flags(irq_flags);
1082 buffer = global_trace.array_buffer.buffer;
1083
1084 ring_buffer_nest_start(buffer);
1085 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1086 irq_flags, pc);
1087 if (!event)
1088 goto out;
1089
1090 entry = ring_buffer_event_data(event);
1091 entry->ip = ip;
1092 entry->str = str;
1093
1094 __buffer_unlock_commit(buffer, event);
1095 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1096
1097 ret = 1;
1098 out:
1099 ring_buffer_nest_end(buffer);
1100 return ret;
1101 }
1102 EXPORT_SYMBOL_GPL(__trace_bputs);
1103
1104 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1105 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1106 void *cond_data)
1107 {
1108 struct tracer *tracer = tr->current_trace;
1109 unsigned long flags;
1110
1111 if (in_nmi()) {
1112 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1113 internal_trace_puts("*** snapshot is being ignored ***\n");
1114 return;
1115 }
1116
1117 if (!tr->allocated_snapshot) {
1118 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1119 internal_trace_puts("*** stopping trace here! ***\n");
1120 tracing_off();
1121 return;
1122 }
1123
1124 /* Note, snapshot can not be used when the tracer uses it */
1125 if (tracer->use_max_tr) {
1126 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1127 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1128 return;
1129 }
1130
1131 local_irq_save(flags);
1132 update_max_tr(tr, current, smp_processor_id(), cond_data);
1133 local_irq_restore(flags);
1134 }
1135
tracing_snapshot_instance(struct trace_array * tr)1136 void tracing_snapshot_instance(struct trace_array *tr)
1137 {
1138 tracing_snapshot_instance_cond(tr, NULL);
1139 }
1140
1141 /**
1142 * tracing_snapshot - take a snapshot of the current buffer.
1143 *
1144 * This causes a swap between the snapshot buffer and the current live
1145 * tracing buffer. You can use this to take snapshots of the live
1146 * trace when some condition is triggered, but continue to trace.
1147 *
1148 * Note, make sure to allocate the snapshot with either
1149 * a tracing_snapshot_alloc(), or by doing it manually
1150 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1151 *
1152 * If the snapshot buffer is not allocated, it will stop tracing.
1153 * Basically making a permanent snapshot.
1154 */
tracing_snapshot(void)1155 void tracing_snapshot(void)
1156 {
1157 struct trace_array *tr = &global_trace;
1158
1159 tracing_snapshot_instance(tr);
1160 }
1161 EXPORT_SYMBOL_GPL(tracing_snapshot);
1162
1163 /**
1164 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1165 * @tr: The tracing instance to snapshot
1166 * @cond_data: The data to be tested conditionally, and possibly saved
1167 *
1168 * This is the same as tracing_snapshot() except that the snapshot is
1169 * conditional - the snapshot will only happen if the
1170 * cond_snapshot.update() implementation receiving the cond_data
1171 * returns true, which means that the trace array's cond_snapshot
1172 * update() operation used the cond_data to determine whether the
1173 * snapshot should be taken, and if it was, presumably saved it along
1174 * with the snapshot.
1175 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1176 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1177 {
1178 tracing_snapshot_instance_cond(tr, cond_data);
1179 }
1180 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1181
1182 /**
1183 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1184 * @tr: The tracing instance
1185 *
1186 * When the user enables a conditional snapshot using
1187 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1188 * with the snapshot. This accessor is used to retrieve it.
1189 *
1190 * Should not be called from cond_snapshot.update(), since it takes
1191 * the tr->max_lock lock, which the code calling
1192 * cond_snapshot.update() has already done.
1193 *
1194 * Returns the cond_data associated with the trace array's snapshot.
1195 */
tracing_cond_snapshot_data(struct trace_array * tr)1196 void *tracing_cond_snapshot_data(struct trace_array *tr)
1197 {
1198 void *cond_data = NULL;
1199
1200 local_irq_disable();
1201 arch_spin_lock(&tr->max_lock);
1202
1203 if (tr->cond_snapshot)
1204 cond_data = tr->cond_snapshot->cond_data;
1205
1206 arch_spin_unlock(&tr->max_lock);
1207 local_irq_enable();
1208
1209 return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214 struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
tracing_alloc_snapshot_instance(struct trace_array * tr)1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219 int ret;
1220
1221 if (!tr->allocated_snapshot) {
1222
1223 /* allocate spare buffer */
1224 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226 if (ret < 0)
1227 return ret;
1228
1229 tr->allocated_snapshot = true;
1230 }
1231
1232 return 0;
1233 }
1234
free_snapshot(struct trace_array * tr)1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237 /*
1238 * We don't free the ring buffer. instead, resize it because
1239 * The max_tr ring buffer has some state (e.g. ring->clock) and
1240 * we want preserve it.
1241 */
1242 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243 set_buffer_entries(&tr->max_buffer, 1);
1244 tracing_reset_online_cpus(&tr->max_buffer);
1245 tr->allocated_snapshot = false;
1246 }
1247
1248 /**
1249 * tracing_alloc_snapshot - allocate snapshot buffer.
1250 *
1251 * This only allocates the snapshot buffer if it isn't already
1252 * allocated - it doesn't also take a snapshot.
1253 *
1254 * This is meant to be used in cases where the snapshot buffer needs
1255 * to be set up for events that can't sleep but need to be able to
1256 * trigger a snapshot.
1257 */
tracing_alloc_snapshot(void)1258 int tracing_alloc_snapshot(void)
1259 {
1260 struct trace_array *tr = &global_trace;
1261 int ret;
1262
1263 ret = tracing_alloc_snapshot_instance(tr);
1264 WARN_ON(ret < 0);
1265
1266 return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270 /**
1271 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272 *
1273 * This is similar to tracing_snapshot(), but it will allocate the
1274 * snapshot buffer if it isn't already allocated. Use this only
1275 * where it is safe to sleep, as the allocation may sleep.
1276 *
1277 * This causes a swap between the snapshot buffer and the current live
1278 * tracing buffer. You can use this to take snapshots of the live
1279 * trace when some condition is triggered, but continue to trace.
1280 */
tracing_snapshot_alloc(void)1281 void tracing_snapshot_alloc(void)
1282 {
1283 int ret;
1284
1285 ret = tracing_alloc_snapshot();
1286 if (ret < 0)
1287 return;
1288
1289 tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293 /**
1294 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295 * @tr: The tracing instance
1296 * @cond_data: User data to associate with the snapshot
1297 * @update: Implementation of the cond_snapshot update function
1298 *
1299 * Check whether the conditional snapshot for the given instance has
1300 * already been enabled, or if the current tracer is already using a
1301 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302 * save the cond_data and update function inside.
1303 *
1304 * Returns 0 if successful, error otherwise.
1305 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307 cond_update_fn_t update)
1308 {
1309 struct cond_snapshot *cond_snapshot;
1310 int ret = 0;
1311
1312 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313 if (!cond_snapshot)
1314 return -ENOMEM;
1315
1316 cond_snapshot->cond_data = cond_data;
1317 cond_snapshot->update = update;
1318
1319 mutex_lock(&trace_types_lock);
1320
1321 ret = tracing_alloc_snapshot_instance(tr);
1322 if (ret)
1323 goto fail_unlock;
1324
1325 if (tr->current_trace->use_max_tr) {
1326 ret = -EBUSY;
1327 goto fail_unlock;
1328 }
1329
1330 /*
1331 * The cond_snapshot can only change to NULL without the
1332 * trace_types_lock. We don't care if we race with it going
1333 * to NULL, but we want to make sure that it's not set to
1334 * something other than NULL when we get here, which we can
1335 * do safely with only holding the trace_types_lock and not
1336 * having to take the max_lock.
1337 */
1338 if (tr->cond_snapshot) {
1339 ret = -EBUSY;
1340 goto fail_unlock;
1341 }
1342
1343 local_irq_disable();
1344 arch_spin_lock(&tr->max_lock);
1345 tr->cond_snapshot = cond_snapshot;
1346 arch_spin_unlock(&tr->max_lock);
1347 local_irq_enable();
1348
1349 mutex_unlock(&trace_types_lock);
1350
1351 return ret;
1352
1353 fail_unlock:
1354 mutex_unlock(&trace_types_lock);
1355 kfree(cond_snapshot);
1356 return ret;
1357 }
1358 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1359
1360 /**
1361 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1362 * @tr: The tracing instance
1363 *
1364 * Check whether the conditional snapshot for the given instance is
1365 * enabled; if so, free the cond_snapshot associated with it,
1366 * otherwise return -EINVAL.
1367 *
1368 * Returns 0 if successful, error otherwise.
1369 */
tracing_snapshot_cond_disable(struct trace_array * tr)1370 int tracing_snapshot_cond_disable(struct trace_array *tr)
1371 {
1372 int ret = 0;
1373
1374 local_irq_disable();
1375 arch_spin_lock(&tr->max_lock);
1376
1377 if (!tr->cond_snapshot)
1378 ret = -EINVAL;
1379 else {
1380 kfree(tr->cond_snapshot);
1381 tr->cond_snapshot = NULL;
1382 }
1383
1384 arch_spin_unlock(&tr->max_lock);
1385 local_irq_enable();
1386
1387 return ret;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1390 #else
tracing_snapshot(void)1391 void tracing_snapshot(void)
1392 {
1393 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1396 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1397 {
1398 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1401 int tracing_alloc_snapshot(void)
1402 {
1403 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1404 return -ENODEV;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1407 void tracing_snapshot_alloc(void)
1408 {
1409 /* Give warning */
1410 tracing_snapshot();
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1413 void *tracing_cond_snapshot_data(struct trace_array *tr)
1414 {
1415 return NULL;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1418 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1419 {
1420 return -ENODEV;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1423 int tracing_snapshot_cond_disable(struct trace_array *tr)
1424 {
1425 return false;
1426 }
1427 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1428 #endif /* CONFIG_TRACER_SNAPSHOT */
1429
tracer_tracing_off(struct trace_array * tr)1430 void tracer_tracing_off(struct trace_array *tr)
1431 {
1432 if (tr->array_buffer.buffer)
1433 ring_buffer_record_off(tr->array_buffer.buffer);
1434 /*
1435 * This flag is looked at when buffers haven't been allocated
1436 * yet, or by some tracers (like irqsoff), that just want to
1437 * know if the ring buffer has been disabled, but it can handle
1438 * races of where it gets disabled but we still do a record.
1439 * As the check is in the fast path of the tracers, it is more
1440 * important to be fast than accurate.
1441 */
1442 tr->buffer_disabled = 1;
1443 /* Make the flag seen by readers */
1444 smp_wmb();
1445 }
1446
1447 /**
1448 * tracing_off - turn off tracing buffers
1449 *
1450 * This function stops the tracing buffers from recording data.
1451 * It does not disable any overhead the tracers themselves may
1452 * be causing. This function simply causes all recording to
1453 * the ring buffers to fail.
1454 */
tracing_off(void)1455 void tracing_off(void)
1456 {
1457 tracer_tracing_off(&global_trace);
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_off);
1460
disable_trace_on_warning(void)1461 void disable_trace_on_warning(void)
1462 {
1463 if (__disable_trace_on_warning) {
1464 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1465 "Disabling tracing due to warning\n");
1466 tracing_off();
1467 }
1468 }
1469
1470 /**
1471 * tracer_tracing_is_on - show real state of ring buffer enabled
1472 * @tr : the trace array to know if ring buffer is enabled
1473 *
1474 * Shows real state of the ring buffer if it is enabled or not.
1475 */
tracer_tracing_is_on(struct trace_array * tr)1476 bool tracer_tracing_is_on(struct trace_array *tr)
1477 {
1478 if (tr->array_buffer.buffer)
1479 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1480 return !tr->buffer_disabled;
1481 }
1482
1483 /**
1484 * tracing_is_on - show state of ring buffers enabled
1485 */
tracing_is_on(void)1486 int tracing_is_on(void)
1487 {
1488 return tracer_tracing_is_on(&global_trace);
1489 }
1490 EXPORT_SYMBOL_GPL(tracing_is_on);
1491
set_buf_size(char * str)1492 static int __init set_buf_size(char *str)
1493 {
1494 unsigned long buf_size;
1495
1496 if (!str)
1497 return 0;
1498 buf_size = memparse(str, &str);
1499 /*
1500 * nr_entries can not be zero and the startup
1501 * tests require some buffer space. Therefore
1502 * ensure we have at least 4096 bytes of buffer.
1503 */
1504 trace_buf_size = max(4096UL, buf_size);
1505 return 1;
1506 }
1507 __setup("trace_buf_size=", set_buf_size);
1508
set_tracing_thresh(char * str)1509 static int __init set_tracing_thresh(char *str)
1510 {
1511 unsigned long threshold;
1512 int ret;
1513
1514 if (!str)
1515 return 0;
1516 ret = kstrtoul(str, 0, &threshold);
1517 if (ret < 0)
1518 return 0;
1519 tracing_thresh = threshold * 1000;
1520 return 1;
1521 }
1522 __setup("tracing_thresh=", set_tracing_thresh);
1523
nsecs_to_usecs(unsigned long nsecs)1524 unsigned long nsecs_to_usecs(unsigned long nsecs)
1525 {
1526 return nsecs / 1000;
1527 }
1528
1529 /*
1530 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1531 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1532 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1533 * of strings in the order that the evals (enum) were defined.
1534 */
1535 #undef C
1536 #define C(a, b) b
1537
1538 /* These must match the bit postions in trace_iterator_flags */
1539 static const char *trace_options[] = {
1540 TRACE_FLAGS
1541 NULL
1542 };
1543
1544 static struct {
1545 u64 (*func)(void);
1546 const char *name;
1547 int in_ns; /* is this clock in nanoseconds? */
1548 } trace_clocks[] = {
1549 { trace_clock_local, "local", 1 },
1550 { trace_clock_global, "global", 1 },
1551 { trace_clock_counter, "counter", 0 },
1552 { trace_clock_jiffies, "uptime", 0 },
1553 { trace_clock, "perf", 1 },
1554 { ktime_get_mono_fast_ns, "mono", 1 },
1555 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1556 { ktime_get_boot_fast_ns, "boot", 1 },
1557 ARCH_TRACE_CLOCKS
1558 };
1559
trace_clock_in_ns(struct trace_array * tr)1560 bool trace_clock_in_ns(struct trace_array *tr)
1561 {
1562 if (trace_clocks[tr->clock_id].in_ns)
1563 return true;
1564
1565 return false;
1566 }
1567
1568 /*
1569 * trace_parser_get_init - gets the buffer for trace parser
1570 */
trace_parser_get_init(struct trace_parser * parser,int size)1571 int trace_parser_get_init(struct trace_parser *parser, int size)
1572 {
1573 memset(parser, 0, sizeof(*parser));
1574
1575 parser->buffer = kmalloc(size, GFP_KERNEL);
1576 if (!parser->buffer)
1577 return 1;
1578
1579 parser->size = size;
1580 return 0;
1581 }
1582
1583 /*
1584 * trace_parser_put - frees the buffer for trace parser
1585 */
trace_parser_put(struct trace_parser * parser)1586 void trace_parser_put(struct trace_parser *parser)
1587 {
1588 kfree(parser->buffer);
1589 parser->buffer = NULL;
1590 }
1591
1592 /*
1593 * trace_get_user - reads the user input string separated by space
1594 * (matched by isspace(ch))
1595 *
1596 * For each string found the 'struct trace_parser' is updated,
1597 * and the function returns.
1598 *
1599 * Returns number of bytes read.
1600 *
1601 * See kernel/trace/trace.h for 'struct trace_parser' details.
1602 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1603 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1604 size_t cnt, loff_t *ppos)
1605 {
1606 char ch;
1607 size_t read = 0;
1608 ssize_t ret;
1609
1610 if (!*ppos)
1611 trace_parser_clear(parser);
1612
1613 ret = get_user(ch, ubuf++);
1614 if (ret)
1615 goto out;
1616
1617 read++;
1618 cnt--;
1619
1620 /*
1621 * The parser is not finished with the last write,
1622 * continue reading the user input without skipping spaces.
1623 */
1624 if (!parser->cont) {
1625 /* skip white space */
1626 while (cnt && isspace(ch)) {
1627 ret = get_user(ch, ubuf++);
1628 if (ret)
1629 goto out;
1630 read++;
1631 cnt--;
1632 }
1633
1634 parser->idx = 0;
1635
1636 /* only spaces were written */
1637 if (isspace(ch) || !ch) {
1638 *ppos += read;
1639 ret = read;
1640 goto out;
1641 }
1642 }
1643
1644 /* read the non-space input */
1645 while (cnt && !isspace(ch) && ch) {
1646 if (parser->idx < parser->size - 1)
1647 parser->buffer[parser->idx++] = ch;
1648 else {
1649 ret = -EINVAL;
1650 goto out;
1651 }
1652 ret = get_user(ch, ubuf++);
1653 if (ret)
1654 goto out;
1655 read++;
1656 cnt--;
1657 }
1658
1659 /* We either got finished input or we have to wait for another call. */
1660 if (isspace(ch) || !ch) {
1661 parser->buffer[parser->idx] = 0;
1662 parser->cont = false;
1663 } else if (parser->idx < parser->size - 1) {
1664 parser->cont = true;
1665 parser->buffer[parser->idx++] = ch;
1666 /* Make sure the parsed string always terminates with '\0'. */
1667 parser->buffer[parser->idx] = 0;
1668 } else {
1669 ret = -EINVAL;
1670 goto out;
1671 }
1672
1673 *ppos += read;
1674 ret = read;
1675
1676 out:
1677 return ret;
1678 }
1679
1680 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1681 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1682 {
1683 int len;
1684
1685 if (trace_seq_used(s) <= s->seq.readpos)
1686 return -EBUSY;
1687
1688 len = trace_seq_used(s) - s->seq.readpos;
1689 if (cnt > len)
1690 cnt = len;
1691 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1692
1693 s->seq.readpos += cnt;
1694 return cnt;
1695 }
1696
1697 unsigned long __read_mostly tracing_thresh;
1698 static const struct file_operations tracing_max_lat_fops;
1699
1700 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1701 defined(CONFIG_FSNOTIFY)
1702
1703 static struct workqueue_struct *fsnotify_wq;
1704
latency_fsnotify_workfn(struct work_struct * work)1705 static void latency_fsnotify_workfn(struct work_struct *work)
1706 {
1707 struct trace_array *tr = container_of(work, struct trace_array,
1708 fsnotify_work);
1709 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1710 }
1711
latency_fsnotify_workfn_irq(struct irq_work * iwork)1712 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1713 {
1714 struct trace_array *tr = container_of(iwork, struct trace_array,
1715 fsnotify_irqwork);
1716 queue_work(fsnotify_wq, &tr->fsnotify_work);
1717 }
1718
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1719 static void trace_create_maxlat_file(struct trace_array *tr,
1720 struct dentry *d_tracer)
1721 {
1722 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1723 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1724 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1725 d_tracer, &tr->max_latency,
1726 &tracing_max_lat_fops);
1727 }
1728
latency_fsnotify_init(void)1729 __init static int latency_fsnotify_init(void)
1730 {
1731 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1732 WQ_UNBOUND | WQ_HIGHPRI, 0);
1733 if (!fsnotify_wq) {
1734 pr_err("Unable to allocate tr_max_lat_wq\n");
1735 return -ENOMEM;
1736 }
1737 return 0;
1738 }
1739
1740 late_initcall_sync(latency_fsnotify_init);
1741
latency_fsnotify(struct trace_array * tr)1742 void latency_fsnotify(struct trace_array *tr)
1743 {
1744 if (!fsnotify_wq)
1745 return;
1746 /*
1747 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1748 * possible that we are called from __schedule() or do_idle(), which
1749 * could cause a deadlock.
1750 */
1751 irq_work_queue(&tr->fsnotify_irqwork);
1752 }
1753
1754 /*
1755 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1756 * defined(CONFIG_FSNOTIFY)
1757 */
1758 #else
1759
1760 #define trace_create_maxlat_file(tr, d_tracer) \
1761 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1762 &tr->max_latency, &tracing_max_lat_fops)
1763
1764 #endif
1765
1766 #ifdef CONFIG_TRACER_MAX_TRACE
1767 /*
1768 * Copy the new maximum trace into the separate maximum-trace
1769 * structure. (this way the maximum trace is permanently saved,
1770 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1771 */
1772 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1773 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1774 {
1775 struct array_buffer *trace_buf = &tr->array_buffer;
1776 struct array_buffer *max_buf = &tr->max_buffer;
1777 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1778 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1779
1780 max_buf->cpu = cpu;
1781 max_buf->time_start = data->preempt_timestamp;
1782
1783 max_data->saved_latency = tr->max_latency;
1784 max_data->critical_start = data->critical_start;
1785 max_data->critical_end = data->critical_end;
1786
1787 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1788 max_data->pid = tsk->pid;
1789 /*
1790 * If tsk == current, then use current_uid(), as that does not use
1791 * RCU. The irq tracer can be called out of RCU scope.
1792 */
1793 if (tsk == current)
1794 max_data->uid = current_uid();
1795 else
1796 max_data->uid = task_uid(tsk);
1797
1798 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1799 max_data->policy = tsk->policy;
1800 max_data->rt_priority = tsk->rt_priority;
1801
1802 /* record this tasks comm */
1803 tracing_record_cmdline(tsk);
1804 latency_fsnotify(tr);
1805 }
1806
1807 /**
1808 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1809 * @tr: tracer
1810 * @tsk: the task with the latency
1811 * @cpu: The cpu that initiated the trace.
1812 * @cond_data: User data associated with a conditional snapshot
1813 *
1814 * Flip the buffers between the @tr and the max_tr and record information
1815 * about which task was the cause of this latency.
1816 */
1817 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1818 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1819 void *cond_data)
1820 {
1821 if (tr->stop_count)
1822 return;
1823
1824 WARN_ON_ONCE(!irqs_disabled());
1825
1826 if (!tr->allocated_snapshot) {
1827 /* Only the nop tracer should hit this when disabling */
1828 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1829 return;
1830 }
1831
1832 arch_spin_lock(&tr->max_lock);
1833
1834 /* Inherit the recordable setting from array_buffer */
1835 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1836 ring_buffer_record_on(tr->max_buffer.buffer);
1837 else
1838 ring_buffer_record_off(tr->max_buffer.buffer);
1839
1840 #ifdef CONFIG_TRACER_SNAPSHOT
1841 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1842 goto out_unlock;
1843 #endif
1844 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1845
1846 __update_max_tr(tr, tsk, cpu);
1847
1848 out_unlock:
1849 arch_spin_unlock(&tr->max_lock);
1850 }
1851
1852 /**
1853 * update_max_tr_single - only copy one trace over, and reset the rest
1854 * @tr: tracer
1855 * @tsk: task with the latency
1856 * @cpu: the cpu of the buffer to copy.
1857 *
1858 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1859 */
1860 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1861 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1862 {
1863 int ret;
1864
1865 if (tr->stop_count)
1866 return;
1867
1868 WARN_ON_ONCE(!irqs_disabled());
1869 if (!tr->allocated_snapshot) {
1870 /* Only the nop tracer should hit this when disabling */
1871 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1872 return;
1873 }
1874
1875 arch_spin_lock(&tr->max_lock);
1876
1877 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1878
1879 if (ret == -EBUSY) {
1880 /*
1881 * We failed to swap the buffer due to a commit taking
1882 * place on this CPU. We fail to record, but we reset
1883 * the max trace buffer (no one writes directly to it)
1884 * and flag that it failed.
1885 */
1886 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1887 "Failed to swap buffers due to commit in progress\n");
1888 }
1889
1890 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1891
1892 __update_max_tr(tr, tsk, cpu);
1893 arch_spin_unlock(&tr->max_lock);
1894 }
1895 #endif /* CONFIG_TRACER_MAX_TRACE */
1896
wait_on_pipe(struct trace_iterator * iter,int full)1897 static int wait_on_pipe(struct trace_iterator *iter, int full)
1898 {
1899 /* Iterators are static, they should be filled or empty */
1900 if (trace_buffer_iter(iter, iter->cpu_file))
1901 return 0;
1902
1903 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1904 full);
1905 }
1906
1907 #ifdef CONFIG_FTRACE_STARTUP_TEST
1908 static bool selftests_can_run;
1909
1910 struct trace_selftests {
1911 struct list_head list;
1912 struct tracer *type;
1913 };
1914
1915 static LIST_HEAD(postponed_selftests);
1916
save_selftest(struct tracer * type)1917 static int save_selftest(struct tracer *type)
1918 {
1919 struct trace_selftests *selftest;
1920
1921 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1922 if (!selftest)
1923 return -ENOMEM;
1924
1925 selftest->type = type;
1926 list_add(&selftest->list, &postponed_selftests);
1927 return 0;
1928 }
1929
run_tracer_selftest(struct tracer * type)1930 static int run_tracer_selftest(struct tracer *type)
1931 {
1932 struct trace_array *tr = &global_trace;
1933 struct tracer *saved_tracer = tr->current_trace;
1934 int ret;
1935
1936 if (!type->selftest || tracing_selftest_disabled)
1937 return 0;
1938
1939 /*
1940 * If a tracer registers early in boot up (before scheduling is
1941 * initialized and such), then do not run its selftests yet.
1942 * Instead, run it a little later in the boot process.
1943 */
1944 if (!selftests_can_run)
1945 return save_selftest(type);
1946
1947 /*
1948 * Run a selftest on this tracer.
1949 * Here we reset the trace buffer, and set the current
1950 * tracer to be this tracer. The tracer can then run some
1951 * internal tracing to verify that everything is in order.
1952 * If we fail, we do not register this tracer.
1953 */
1954 tracing_reset_online_cpus(&tr->array_buffer);
1955
1956 tr->current_trace = type;
1957
1958 #ifdef CONFIG_TRACER_MAX_TRACE
1959 if (type->use_max_tr) {
1960 /* If we expanded the buffers, make sure the max is expanded too */
1961 if (ring_buffer_expanded)
1962 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1963 RING_BUFFER_ALL_CPUS);
1964 tr->allocated_snapshot = true;
1965 }
1966 #endif
1967
1968 /* the test is responsible for initializing and enabling */
1969 pr_info("Testing tracer %s: ", type->name);
1970 ret = type->selftest(type, tr);
1971 /* the test is responsible for resetting too */
1972 tr->current_trace = saved_tracer;
1973 if (ret) {
1974 printk(KERN_CONT "FAILED!\n");
1975 /* Add the warning after printing 'FAILED' */
1976 WARN_ON(1);
1977 return -1;
1978 }
1979 /* Only reset on passing, to avoid touching corrupted buffers */
1980 tracing_reset_online_cpus(&tr->array_buffer);
1981
1982 #ifdef CONFIG_TRACER_MAX_TRACE
1983 if (type->use_max_tr) {
1984 tr->allocated_snapshot = false;
1985
1986 /* Shrink the max buffer again */
1987 if (ring_buffer_expanded)
1988 ring_buffer_resize(tr->max_buffer.buffer, 1,
1989 RING_BUFFER_ALL_CPUS);
1990 }
1991 #endif
1992
1993 printk(KERN_CONT "PASSED\n");
1994 return 0;
1995 }
1996
init_trace_selftests(void)1997 static __init int init_trace_selftests(void)
1998 {
1999 struct trace_selftests *p, *n;
2000 struct tracer *t, **last;
2001 int ret;
2002
2003 selftests_can_run = true;
2004
2005 mutex_lock(&trace_types_lock);
2006
2007 if (list_empty(&postponed_selftests))
2008 goto out;
2009
2010 pr_info("Running postponed tracer tests:\n");
2011
2012 tracing_selftest_running = true;
2013 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2014 /* This loop can take minutes when sanitizers are enabled, so
2015 * lets make sure we allow RCU processing.
2016 */
2017 cond_resched();
2018 ret = run_tracer_selftest(p->type);
2019 /* If the test fails, then warn and remove from available_tracers */
2020 if (ret < 0) {
2021 WARN(1, "tracer: %s failed selftest, disabling\n",
2022 p->type->name);
2023 last = &trace_types;
2024 for (t = trace_types; t; t = t->next) {
2025 if (t == p->type) {
2026 *last = t->next;
2027 break;
2028 }
2029 last = &t->next;
2030 }
2031 }
2032 list_del(&p->list);
2033 kfree(p);
2034 }
2035 tracing_selftest_running = false;
2036
2037 out:
2038 mutex_unlock(&trace_types_lock);
2039
2040 return 0;
2041 }
2042 core_initcall(init_trace_selftests);
2043 #else
run_tracer_selftest(struct tracer * type)2044 static inline int run_tracer_selftest(struct tracer *type)
2045 {
2046 return 0;
2047 }
2048 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2049
2050 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2051
2052 static void __init apply_trace_boot_options(void);
2053
2054 /**
2055 * register_tracer - register a tracer with the ftrace system.
2056 * @type: the plugin for the tracer
2057 *
2058 * Register a new plugin tracer.
2059 */
register_tracer(struct tracer * type)2060 int __init register_tracer(struct tracer *type)
2061 {
2062 struct tracer *t;
2063 int ret = 0;
2064
2065 if (!type->name) {
2066 pr_info("Tracer must have a name\n");
2067 return -1;
2068 }
2069
2070 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2071 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2072 return -1;
2073 }
2074
2075 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2076 pr_warn("Can not register tracer %s due to lockdown\n",
2077 type->name);
2078 return -EPERM;
2079 }
2080
2081 mutex_lock(&trace_types_lock);
2082
2083 tracing_selftest_running = true;
2084
2085 for (t = trace_types; t; t = t->next) {
2086 if (strcmp(type->name, t->name) == 0) {
2087 /* already found */
2088 pr_info("Tracer %s already registered\n",
2089 type->name);
2090 ret = -1;
2091 goto out;
2092 }
2093 }
2094
2095 if (!type->set_flag)
2096 type->set_flag = &dummy_set_flag;
2097 if (!type->flags) {
2098 /*allocate a dummy tracer_flags*/
2099 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2100 if (!type->flags) {
2101 ret = -ENOMEM;
2102 goto out;
2103 }
2104 type->flags->val = 0;
2105 type->flags->opts = dummy_tracer_opt;
2106 } else
2107 if (!type->flags->opts)
2108 type->flags->opts = dummy_tracer_opt;
2109
2110 /* store the tracer for __set_tracer_option */
2111 type->flags->trace = type;
2112
2113 ret = run_tracer_selftest(type);
2114 if (ret < 0)
2115 goto out;
2116
2117 type->next = trace_types;
2118 trace_types = type;
2119 add_tracer_options(&global_trace, type);
2120
2121 out:
2122 tracing_selftest_running = false;
2123 mutex_unlock(&trace_types_lock);
2124
2125 if (ret || !default_bootup_tracer)
2126 goto out_unlock;
2127
2128 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2129 goto out_unlock;
2130
2131 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2132 /* Do we want this tracer to start on bootup? */
2133 tracing_set_tracer(&global_trace, type->name);
2134 default_bootup_tracer = NULL;
2135
2136 apply_trace_boot_options();
2137
2138 /* disable other selftests, since this will break it. */
2139 disable_tracing_selftest("running a tracer");
2140
2141 out_unlock:
2142 return ret;
2143 }
2144
tracing_reset_cpu(struct array_buffer * buf,int cpu)2145 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2146 {
2147 struct trace_buffer *buffer = buf->buffer;
2148
2149 if (!buffer)
2150 return;
2151
2152 ring_buffer_record_disable(buffer);
2153
2154 /* Make sure all commits have finished */
2155 synchronize_rcu();
2156 ring_buffer_reset_cpu(buffer, cpu);
2157
2158 ring_buffer_record_enable(buffer);
2159 }
2160
tracing_reset_online_cpus(struct array_buffer * buf)2161 void tracing_reset_online_cpus(struct array_buffer *buf)
2162 {
2163 struct trace_buffer *buffer = buf->buffer;
2164
2165 if (!buffer)
2166 return;
2167
2168 ring_buffer_record_disable(buffer);
2169
2170 /* Make sure all commits have finished */
2171 synchronize_rcu();
2172
2173 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2174
2175 ring_buffer_reset_online_cpus(buffer);
2176
2177 ring_buffer_record_enable(buffer);
2178 }
2179
2180 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)2181 void tracing_reset_all_online_cpus(void)
2182 {
2183 struct trace_array *tr;
2184
2185 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2186 if (!tr->clear_trace)
2187 continue;
2188 tr->clear_trace = false;
2189 tracing_reset_online_cpus(&tr->array_buffer);
2190 #ifdef CONFIG_TRACER_MAX_TRACE
2191 tracing_reset_online_cpus(&tr->max_buffer);
2192 #endif
2193 }
2194 }
2195
2196 /*
2197 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2198 * is the tgid last observed corresponding to pid=i.
2199 */
2200 static int *tgid_map;
2201
2202 /* The maximum valid index into tgid_map. */
2203 static size_t tgid_map_max;
2204
2205 #define SAVED_CMDLINES_DEFAULT 128
2206 #define NO_CMDLINE_MAP UINT_MAX
2207 /*
2208 * Preemption must be disabled before acquiring trace_cmdline_lock.
2209 * The various trace_arrays' max_lock must be acquired in a context
2210 * where interrupt is disabled.
2211 */
2212 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2213 struct saved_cmdlines_buffer {
2214 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2215 unsigned *map_cmdline_to_pid;
2216 unsigned cmdline_num;
2217 int cmdline_idx;
2218 char *saved_cmdlines;
2219 };
2220 static struct saved_cmdlines_buffer *savedcmd;
2221
get_saved_cmdlines(int idx)2222 static inline char *get_saved_cmdlines(int idx)
2223 {
2224 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2225 }
2226
set_cmdline(int idx,const char * cmdline)2227 static inline void set_cmdline(int idx, const char *cmdline)
2228 {
2229 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2230 }
2231
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)2232 static int allocate_cmdlines_buffer(unsigned int val,
2233 struct saved_cmdlines_buffer *s)
2234 {
2235 s->map_cmdline_to_pid = kmalloc_array(val,
2236 sizeof(*s->map_cmdline_to_pid),
2237 GFP_KERNEL);
2238 if (!s->map_cmdline_to_pid)
2239 return -ENOMEM;
2240
2241 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2242 if (!s->saved_cmdlines) {
2243 kfree(s->map_cmdline_to_pid);
2244 return -ENOMEM;
2245 }
2246
2247 s->cmdline_idx = 0;
2248 s->cmdline_num = val;
2249 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2250 sizeof(s->map_pid_to_cmdline));
2251 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2252 val * sizeof(*s->map_cmdline_to_pid));
2253
2254 return 0;
2255 }
2256
trace_create_savedcmd(void)2257 static int trace_create_savedcmd(void)
2258 {
2259 int ret;
2260
2261 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2262 if (!savedcmd)
2263 return -ENOMEM;
2264
2265 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2266 if (ret < 0) {
2267 kfree(savedcmd);
2268 savedcmd = NULL;
2269 return -ENOMEM;
2270 }
2271
2272 return 0;
2273 }
2274
is_tracing_stopped(void)2275 int is_tracing_stopped(void)
2276 {
2277 return global_trace.stop_count;
2278 }
2279
2280 /**
2281 * tracing_start - quick start of the tracer
2282 *
2283 * If tracing is enabled but was stopped by tracing_stop,
2284 * this will start the tracer back up.
2285 */
tracing_start(void)2286 void tracing_start(void)
2287 {
2288 struct trace_buffer *buffer;
2289 unsigned long flags;
2290
2291 if (tracing_disabled)
2292 return;
2293
2294 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2295 if (--global_trace.stop_count) {
2296 if (global_trace.stop_count < 0) {
2297 /* Someone screwed up their debugging */
2298 WARN_ON_ONCE(1);
2299 global_trace.stop_count = 0;
2300 }
2301 goto out;
2302 }
2303
2304 /* Prevent the buffers from switching */
2305 arch_spin_lock(&global_trace.max_lock);
2306
2307 buffer = global_trace.array_buffer.buffer;
2308 if (buffer)
2309 ring_buffer_record_enable(buffer);
2310
2311 #ifdef CONFIG_TRACER_MAX_TRACE
2312 buffer = global_trace.max_buffer.buffer;
2313 if (buffer)
2314 ring_buffer_record_enable(buffer);
2315 #endif
2316
2317 arch_spin_unlock(&global_trace.max_lock);
2318
2319 out:
2320 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2321 }
2322
tracing_start_tr(struct trace_array * tr)2323 static void tracing_start_tr(struct trace_array *tr)
2324 {
2325 struct trace_buffer *buffer;
2326 unsigned long flags;
2327
2328 if (tracing_disabled)
2329 return;
2330
2331 /* If global, we need to also start the max tracer */
2332 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2333 return tracing_start();
2334
2335 raw_spin_lock_irqsave(&tr->start_lock, flags);
2336
2337 if (--tr->stop_count) {
2338 if (tr->stop_count < 0) {
2339 /* Someone screwed up their debugging */
2340 WARN_ON_ONCE(1);
2341 tr->stop_count = 0;
2342 }
2343 goto out;
2344 }
2345
2346 buffer = tr->array_buffer.buffer;
2347 if (buffer)
2348 ring_buffer_record_enable(buffer);
2349
2350 out:
2351 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2352 }
2353
2354 /**
2355 * tracing_stop - quick stop of the tracer
2356 *
2357 * Light weight way to stop tracing. Use in conjunction with
2358 * tracing_start.
2359 */
tracing_stop(void)2360 void tracing_stop(void)
2361 {
2362 struct trace_buffer *buffer;
2363 unsigned long flags;
2364
2365 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2366 if (global_trace.stop_count++)
2367 goto out;
2368
2369 /* Prevent the buffers from switching */
2370 arch_spin_lock(&global_trace.max_lock);
2371
2372 buffer = global_trace.array_buffer.buffer;
2373 if (buffer)
2374 ring_buffer_record_disable(buffer);
2375
2376 #ifdef CONFIG_TRACER_MAX_TRACE
2377 buffer = global_trace.max_buffer.buffer;
2378 if (buffer)
2379 ring_buffer_record_disable(buffer);
2380 #endif
2381
2382 arch_spin_unlock(&global_trace.max_lock);
2383
2384 out:
2385 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2386 }
2387
tracing_stop_tr(struct trace_array * tr)2388 static void tracing_stop_tr(struct trace_array *tr)
2389 {
2390 struct trace_buffer *buffer;
2391 unsigned long flags;
2392
2393 /* If global, we need to also stop the max tracer */
2394 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2395 return tracing_stop();
2396
2397 raw_spin_lock_irqsave(&tr->start_lock, flags);
2398 if (tr->stop_count++)
2399 goto out;
2400
2401 buffer = tr->array_buffer.buffer;
2402 if (buffer)
2403 ring_buffer_record_disable(buffer);
2404
2405 out:
2406 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2407 }
2408
trace_save_cmdline(struct task_struct * tsk)2409 static int trace_save_cmdline(struct task_struct *tsk)
2410 {
2411 unsigned tpid, idx;
2412
2413 /* treat recording of idle task as a success */
2414 if (!tsk->pid)
2415 return 1;
2416
2417 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2418
2419 /*
2420 * It's not the end of the world if we don't get
2421 * the lock, but we also don't want to spin
2422 * nor do we want to disable interrupts,
2423 * so if we miss here, then better luck next time.
2424 *
2425 * This is called within the scheduler and wake up, so interrupts
2426 * had better been disabled and run queue lock been held.
2427 */
2428 lockdep_assert_preemption_disabled();
2429 if (!arch_spin_trylock(&trace_cmdline_lock))
2430 return 0;
2431
2432 idx = savedcmd->map_pid_to_cmdline[tpid];
2433 if (idx == NO_CMDLINE_MAP) {
2434 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2435
2436 savedcmd->map_pid_to_cmdline[tpid] = idx;
2437 savedcmd->cmdline_idx = idx;
2438 }
2439
2440 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2441 set_cmdline(idx, tsk->comm);
2442
2443 arch_spin_unlock(&trace_cmdline_lock);
2444
2445 return 1;
2446 }
2447
__trace_find_cmdline(int pid,char comm[])2448 static void __trace_find_cmdline(int pid, char comm[])
2449 {
2450 unsigned map;
2451 int tpid;
2452
2453 if (!pid) {
2454 strcpy(comm, "<idle>");
2455 return;
2456 }
2457
2458 if (WARN_ON_ONCE(pid < 0)) {
2459 strcpy(comm, "<XXX>");
2460 return;
2461 }
2462
2463 tpid = pid & (PID_MAX_DEFAULT - 1);
2464 map = savedcmd->map_pid_to_cmdline[tpid];
2465 if (map != NO_CMDLINE_MAP) {
2466 tpid = savedcmd->map_cmdline_to_pid[map];
2467 if (tpid == pid) {
2468 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2469 return;
2470 }
2471 }
2472 strcpy(comm, "<...>");
2473 }
2474
trace_find_cmdline(int pid,char comm[])2475 void trace_find_cmdline(int pid, char comm[])
2476 {
2477 preempt_disable();
2478 arch_spin_lock(&trace_cmdline_lock);
2479
2480 __trace_find_cmdline(pid, comm);
2481
2482 arch_spin_unlock(&trace_cmdline_lock);
2483 preempt_enable();
2484 }
2485
trace_find_tgid_ptr(int pid)2486 static int *trace_find_tgid_ptr(int pid)
2487 {
2488 /*
2489 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2490 * if we observe a non-NULL tgid_map then we also observe the correct
2491 * tgid_map_max.
2492 */
2493 int *map = smp_load_acquire(&tgid_map);
2494
2495 if (unlikely(!map || pid > tgid_map_max))
2496 return NULL;
2497
2498 return &map[pid];
2499 }
2500
trace_find_tgid(int pid)2501 int trace_find_tgid(int pid)
2502 {
2503 int *ptr = trace_find_tgid_ptr(pid);
2504
2505 return ptr ? *ptr : 0;
2506 }
2507
trace_save_tgid(struct task_struct * tsk)2508 static int trace_save_tgid(struct task_struct *tsk)
2509 {
2510 int *ptr;
2511
2512 /* treat recording of idle task as a success */
2513 if (!tsk->pid)
2514 return 1;
2515
2516 ptr = trace_find_tgid_ptr(tsk->pid);
2517 if (!ptr)
2518 return 0;
2519
2520 *ptr = tsk->tgid;
2521 return 1;
2522 }
2523
tracing_record_taskinfo_skip(int flags)2524 static bool tracing_record_taskinfo_skip(int flags)
2525 {
2526 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2527 return true;
2528 if (!__this_cpu_read(trace_taskinfo_save))
2529 return true;
2530 return false;
2531 }
2532
2533 /**
2534 * tracing_record_taskinfo - record the task info of a task
2535 *
2536 * @task: task to record
2537 * @flags: TRACE_RECORD_CMDLINE for recording comm
2538 * TRACE_RECORD_TGID for recording tgid
2539 */
tracing_record_taskinfo(struct task_struct * task,int flags)2540 void tracing_record_taskinfo(struct task_struct *task, int flags)
2541 {
2542 bool done;
2543
2544 if (tracing_record_taskinfo_skip(flags))
2545 return;
2546
2547 /*
2548 * Record as much task information as possible. If some fail, continue
2549 * to try to record the others.
2550 */
2551 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2552 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2553
2554 /* If recording any information failed, retry again soon. */
2555 if (!done)
2556 return;
2557
2558 __this_cpu_write(trace_taskinfo_save, false);
2559 }
2560
2561 /**
2562 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2563 *
2564 * @prev: previous task during sched_switch
2565 * @next: next task during sched_switch
2566 * @flags: TRACE_RECORD_CMDLINE for recording comm
2567 * TRACE_RECORD_TGID for recording tgid
2568 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2569 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2570 struct task_struct *next, int flags)
2571 {
2572 bool done;
2573
2574 if (tracing_record_taskinfo_skip(flags))
2575 return;
2576
2577 /*
2578 * Record as much task information as possible. If some fail, continue
2579 * to try to record the others.
2580 */
2581 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2582 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2583 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2584 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2585
2586 /* If recording any information failed, retry again soon. */
2587 if (!done)
2588 return;
2589
2590 __this_cpu_write(trace_taskinfo_save, false);
2591 }
2592
2593 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2594 void tracing_record_cmdline(struct task_struct *task)
2595 {
2596 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2597 }
2598
tracing_record_tgid(struct task_struct * task)2599 void tracing_record_tgid(struct task_struct *task)
2600 {
2601 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2602 }
2603
2604 /*
2605 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2606 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2607 * simplifies those functions and keeps them in sync.
2608 */
trace_handle_return(struct trace_seq * s)2609 enum print_line_t trace_handle_return(struct trace_seq *s)
2610 {
2611 return trace_seq_has_overflowed(s) ?
2612 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2613 }
2614 EXPORT_SYMBOL_GPL(trace_handle_return);
2615
2616 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2617 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2618 unsigned long flags, int pc)
2619 {
2620 struct task_struct *tsk = current;
2621
2622 entry->preempt_count = pc & 0xff;
2623 entry->pid = (tsk) ? tsk->pid : 0;
2624 entry->type = type;
2625 entry->flags =
2626 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2627 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2628 #else
2629 TRACE_FLAG_IRQS_NOSUPPORT |
2630 #endif
2631 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2632 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2633 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2634 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2635 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2636 }
2637 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2638
2639 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2640 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2641 int type,
2642 unsigned long len,
2643 unsigned long flags, int pc)
2644 {
2645 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2646 }
2647
2648 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2649 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2650 static int trace_buffered_event_ref;
2651
2652 /**
2653 * trace_buffered_event_enable - enable buffering events
2654 *
2655 * When events are being filtered, it is quicker to use a temporary
2656 * buffer to write the event data into if there's a likely chance
2657 * that it will not be committed. The discard of the ring buffer
2658 * is not as fast as committing, and is much slower than copying
2659 * a commit.
2660 *
2661 * When an event is to be filtered, allocate per cpu buffers to
2662 * write the event data into, and if the event is filtered and discarded
2663 * it is simply dropped, otherwise, the entire data is to be committed
2664 * in one shot.
2665 */
trace_buffered_event_enable(void)2666 void trace_buffered_event_enable(void)
2667 {
2668 struct ring_buffer_event *event;
2669 struct page *page;
2670 int cpu;
2671
2672 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2673
2674 if (trace_buffered_event_ref++)
2675 return;
2676
2677 for_each_tracing_cpu(cpu) {
2678 page = alloc_pages_node(cpu_to_node(cpu),
2679 GFP_KERNEL | __GFP_NORETRY, 0);
2680 if (!page)
2681 goto failed;
2682
2683 event = page_address(page);
2684 memset(event, 0, sizeof(*event));
2685
2686 per_cpu(trace_buffered_event, cpu) = event;
2687
2688 preempt_disable();
2689 if (cpu == smp_processor_id() &&
2690 __this_cpu_read(trace_buffered_event) !=
2691 per_cpu(trace_buffered_event, cpu))
2692 WARN_ON_ONCE(1);
2693 preempt_enable();
2694 }
2695
2696 return;
2697 failed:
2698 trace_buffered_event_disable();
2699 }
2700
enable_trace_buffered_event(void * data)2701 static void enable_trace_buffered_event(void *data)
2702 {
2703 /* Probably not needed, but do it anyway */
2704 smp_rmb();
2705 this_cpu_dec(trace_buffered_event_cnt);
2706 }
2707
disable_trace_buffered_event(void * data)2708 static void disable_trace_buffered_event(void *data)
2709 {
2710 this_cpu_inc(trace_buffered_event_cnt);
2711 }
2712
2713 /**
2714 * trace_buffered_event_disable - disable buffering events
2715 *
2716 * When a filter is removed, it is faster to not use the buffered
2717 * events, and to commit directly into the ring buffer. Free up
2718 * the temp buffers when there are no more users. This requires
2719 * special synchronization with current events.
2720 */
trace_buffered_event_disable(void)2721 void trace_buffered_event_disable(void)
2722 {
2723 int cpu;
2724
2725 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2726
2727 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2728 return;
2729
2730 if (--trace_buffered_event_ref)
2731 return;
2732
2733 preempt_disable();
2734 /* For each CPU, set the buffer as used. */
2735 smp_call_function_many(tracing_buffer_mask,
2736 disable_trace_buffered_event, NULL, 1);
2737 preempt_enable();
2738
2739 /* Wait for all current users to finish */
2740 synchronize_rcu();
2741
2742 for_each_tracing_cpu(cpu) {
2743 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2744 per_cpu(trace_buffered_event, cpu) = NULL;
2745 }
2746 /*
2747 * Make sure trace_buffered_event is NULL before clearing
2748 * trace_buffered_event_cnt.
2749 */
2750 smp_wmb();
2751
2752 preempt_disable();
2753 /* Do the work on each cpu */
2754 smp_call_function_many(tracing_buffer_mask,
2755 enable_trace_buffered_event, NULL, 1);
2756 preempt_enable();
2757 }
2758
2759 static struct trace_buffer *temp_buffer;
2760
2761 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2762 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2763 struct trace_event_file *trace_file,
2764 int type, unsigned long len,
2765 unsigned long flags, int pc)
2766 {
2767 struct ring_buffer_event *entry;
2768 int val;
2769
2770 *current_rb = trace_file->tr->array_buffer.buffer;
2771
2772 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2773 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2774 (entry = this_cpu_read(trace_buffered_event))) {
2775 /* Try to use the per cpu buffer first */
2776 val = this_cpu_inc_return(trace_buffered_event_cnt);
2777 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2778 trace_event_setup(entry, type, flags, pc);
2779 entry->array[0] = len;
2780 return entry;
2781 }
2782 this_cpu_dec(trace_buffered_event_cnt);
2783 }
2784
2785 entry = __trace_buffer_lock_reserve(*current_rb,
2786 type, len, flags, pc);
2787 /*
2788 * If tracing is off, but we have triggers enabled
2789 * we still need to look at the event data. Use the temp_buffer
2790 * to store the trace event for the trigger to use. It's recursive
2791 * safe and will not be recorded anywhere.
2792 */
2793 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2794 *current_rb = temp_buffer;
2795 entry = __trace_buffer_lock_reserve(*current_rb,
2796 type, len, flags, pc);
2797 }
2798 return entry;
2799 }
2800 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2801
2802 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2803 static DEFINE_MUTEX(tracepoint_printk_mutex);
2804
output_printk(struct trace_event_buffer * fbuffer)2805 static void output_printk(struct trace_event_buffer *fbuffer)
2806 {
2807 struct trace_event_call *event_call;
2808 struct trace_event_file *file;
2809 struct trace_event *event;
2810 unsigned long flags;
2811 struct trace_iterator *iter = tracepoint_print_iter;
2812
2813 /* We should never get here if iter is NULL */
2814 if (WARN_ON_ONCE(!iter))
2815 return;
2816
2817 event_call = fbuffer->trace_file->event_call;
2818 if (!event_call || !event_call->event.funcs ||
2819 !event_call->event.funcs->trace)
2820 return;
2821
2822 file = fbuffer->trace_file;
2823 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2824 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2825 !filter_match_preds(file->filter, fbuffer->entry)))
2826 return;
2827
2828 event = &fbuffer->trace_file->event_call->event;
2829
2830 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2831 trace_seq_init(&iter->seq);
2832 iter->ent = fbuffer->entry;
2833 event_call->event.funcs->trace(iter, 0, event);
2834 trace_seq_putc(&iter->seq, 0);
2835 printk("%s", iter->seq.buffer);
2836
2837 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2838 }
2839
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2840 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2841 void *buffer, size_t *lenp,
2842 loff_t *ppos)
2843 {
2844 int save_tracepoint_printk;
2845 int ret;
2846
2847 mutex_lock(&tracepoint_printk_mutex);
2848 save_tracepoint_printk = tracepoint_printk;
2849
2850 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2851
2852 /*
2853 * This will force exiting early, as tracepoint_printk
2854 * is always zero when tracepoint_printk_iter is not allocated
2855 */
2856 if (!tracepoint_print_iter)
2857 tracepoint_printk = 0;
2858
2859 if (save_tracepoint_printk == tracepoint_printk)
2860 goto out;
2861
2862 if (tracepoint_printk)
2863 static_key_enable(&tracepoint_printk_key.key);
2864 else
2865 static_key_disable(&tracepoint_printk_key.key);
2866
2867 out:
2868 mutex_unlock(&tracepoint_printk_mutex);
2869
2870 return ret;
2871 }
2872
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2873 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2874 {
2875 if (static_key_false(&tracepoint_printk_key.key))
2876 output_printk(fbuffer);
2877
2878 if (static_branch_unlikely(&trace_event_exports_enabled))
2879 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2880 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2881 fbuffer->event, fbuffer->entry,
2882 fbuffer->flags, fbuffer->pc, fbuffer->regs);
2883 }
2884 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2885
2886 /*
2887 * Skip 3:
2888 *
2889 * trace_buffer_unlock_commit_regs()
2890 * trace_event_buffer_commit()
2891 * trace_event_raw_event_xxx()
2892 */
2893 # define STACK_SKIP 3
2894
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2895 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2896 struct trace_buffer *buffer,
2897 struct ring_buffer_event *event,
2898 unsigned long flags, int pc,
2899 struct pt_regs *regs)
2900 {
2901 __buffer_unlock_commit(buffer, event);
2902
2903 /*
2904 * If regs is not set, then skip the necessary functions.
2905 * Note, we can still get here via blktrace, wakeup tracer
2906 * and mmiotrace, but that's ok if they lose a function or
2907 * two. They are not that meaningful.
2908 */
2909 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2910 ftrace_trace_userstack(tr, buffer, flags, pc);
2911 }
2912
2913 /*
2914 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2915 */
2916 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2917 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2918 struct ring_buffer_event *event)
2919 {
2920 __buffer_unlock_commit(buffer, event);
2921 }
2922
2923 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2924 trace_function(struct trace_array *tr,
2925 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2926 int pc)
2927 {
2928 struct trace_event_call *call = &event_function;
2929 struct trace_buffer *buffer = tr->array_buffer.buffer;
2930 struct ring_buffer_event *event;
2931 struct ftrace_entry *entry;
2932
2933 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2934 flags, pc);
2935 if (!event)
2936 return;
2937 entry = ring_buffer_event_data(event);
2938 entry->ip = ip;
2939 entry->parent_ip = parent_ip;
2940
2941 if (!call_filter_check_discard(call, entry, buffer, event)) {
2942 if (static_branch_unlikely(&trace_function_exports_enabled))
2943 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2944 __buffer_unlock_commit(buffer, event);
2945 }
2946 }
2947
2948 #ifdef CONFIG_STACKTRACE
2949
2950 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2951 #define FTRACE_KSTACK_NESTING 4
2952
2953 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2954
2955 struct ftrace_stack {
2956 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2957 };
2958
2959
2960 struct ftrace_stacks {
2961 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2962 };
2963
2964 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2965 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2966
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2967 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2968 unsigned long flags,
2969 int skip, int pc, struct pt_regs *regs)
2970 {
2971 struct trace_event_call *call = &event_kernel_stack;
2972 struct ring_buffer_event *event;
2973 unsigned int size, nr_entries;
2974 struct ftrace_stack *fstack;
2975 struct stack_entry *entry;
2976 int stackidx;
2977
2978 /*
2979 * Add one, for this function and the call to save_stack_trace()
2980 * If regs is set, then these functions will not be in the way.
2981 */
2982 #ifndef CONFIG_UNWINDER_ORC
2983 if (!regs)
2984 skip++;
2985 #endif
2986
2987 preempt_disable_notrace();
2988
2989 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2990
2991 /* This should never happen. If it does, yell once and skip */
2992 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2993 goto out;
2994
2995 /*
2996 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2997 * interrupt will either see the value pre increment or post
2998 * increment. If the interrupt happens pre increment it will have
2999 * restored the counter when it returns. We just need a barrier to
3000 * keep gcc from moving things around.
3001 */
3002 barrier();
3003
3004 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3005 size = ARRAY_SIZE(fstack->calls);
3006
3007 if (regs) {
3008 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3009 size, skip);
3010 } else {
3011 nr_entries = stack_trace_save(fstack->calls, size, skip);
3012 }
3013
3014 size = nr_entries * sizeof(unsigned long);
3015 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3016 (sizeof(*entry) - sizeof(entry->caller)) + size,
3017 flags, pc);
3018 if (!event)
3019 goto out;
3020 entry = ring_buffer_event_data(event);
3021
3022 memcpy(&entry->caller, fstack->calls, size);
3023 entry->size = nr_entries;
3024
3025 if (!call_filter_check_discard(call, entry, buffer, event))
3026 __buffer_unlock_commit(buffer, event);
3027
3028 out:
3029 /* Again, don't let gcc optimize things here */
3030 barrier();
3031 __this_cpu_dec(ftrace_stack_reserve);
3032 preempt_enable_notrace();
3033
3034 }
3035
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)3036 static inline void ftrace_trace_stack(struct trace_array *tr,
3037 struct trace_buffer *buffer,
3038 unsigned long flags,
3039 int skip, int pc, struct pt_regs *regs)
3040 {
3041 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3042 return;
3043
3044 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3045 }
3046
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)3047 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3048 int pc)
3049 {
3050 struct trace_buffer *buffer = tr->array_buffer.buffer;
3051
3052 if (rcu_is_watching()) {
3053 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3054 return;
3055 }
3056
3057 /*
3058 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3059 * but if the above rcu_is_watching() failed, then the NMI
3060 * triggered someplace critical, and rcu_irq_enter() should
3061 * not be called from NMI.
3062 */
3063 if (unlikely(in_nmi()))
3064 return;
3065
3066 rcu_irq_enter_irqson();
3067 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3068 rcu_irq_exit_irqson();
3069 }
3070
3071 /**
3072 * trace_dump_stack - record a stack back trace in the trace buffer
3073 * @skip: Number of functions to skip (helper handlers)
3074 */
trace_dump_stack(int skip)3075 void trace_dump_stack(int skip)
3076 {
3077 unsigned long flags;
3078
3079 if (tracing_disabled || tracing_selftest_running)
3080 return;
3081
3082 local_save_flags(flags);
3083
3084 #ifndef CONFIG_UNWINDER_ORC
3085 /* Skip 1 to skip this function. */
3086 skip++;
3087 #endif
3088 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3089 flags, skip, preempt_count(), NULL);
3090 }
3091 EXPORT_SYMBOL_GPL(trace_dump_stack);
3092
3093 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3094 static DEFINE_PER_CPU(int, user_stack_count);
3095
3096 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3097 ftrace_trace_userstack(struct trace_array *tr,
3098 struct trace_buffer *buffer, unsigned long flags, int pc)
3099 {
3100 struct trace_event_call *call = &event_user_stack;
3101 struct ring_buffer_event *event;
3102 struct userstack_entry *entry;
3103
3104 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3105 return;
3106
3107 /*
3108 * NMIs can not handle page faults, even with fix ups.
3109 * The save user stack can (and often does) fault.
3110 */
3111 if (unlikely(in_nmi()))
3112 return;
3113
3114 /*
3115 * prevent recursion, since the user stack tracing may
3116 * trigger other kernel events.
3117 */
3118 preempt_disable();
3119 if (__this_cpu_read(user_stack_count))
3120 goto out;
3121
3122 __this_cpu_inc(user_stack_count);
3123
3124 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3125 sizeof(*entry), flags, pc);
3126 if (!event)
3127 goto out_drop_count;
3128 entry = ring_buffer_event_data(event);
3129
3130 entry->tgid = current->tgid;
3131 memset(&entry->caller, 0, sizeof(entry->caller));
3132
3133 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3134 if (!call_filter_check_discard(call, entry, buffer, event))
3135 __buffer_unlock_commit(buffer, event);
3136
3137 out_drop_count:
3138 __this_cpu_dec(user_stack_count);
3139 out:
3140 preempt_enable();
3141 }
3142 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3143 static void ftrace_trace_userstack(struct trace_array *tr,
3144 struct trace_buffer *buffer,
3145 unsigned long flags, int pc)
3146 {
3147 }
3148 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3149
3150 #endif /* CONFIG_STACKTRACE */
3151
3152 /* created for use with alloc_percpu */
3153 struct trace_buffer_struct {
3154 int nesting;
3155 char buffer[4][TRACE_BUF_SIZE];
3156 };
3157
3158 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3159
3160 /*
3161 * Thise allows for lockless recording. If we're nested too deeply, then
3162 * this returns NULL.
3163 */
get_trace_buf(void)3164 static char *get_trace_buf(void)
3165 {
3166 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3167
3168 if (!trace_percpu_buffer || buffer->nesting >= 4)
3169 return NULL;
3170
3171 buffer->nesting++;
3172
3173 /* Interrupts must see nesting incremented before we use the buffer */
3174 barrier();
3175 return &buffer->buffer[buffer->nesting - 1][0];
3176 }
3177
put_trace_buf(void)3178 static void put_trace_buf(void)
3179 {
3180 /* Don't let the decrement of nesting leak before this */
3181 barrier();
3182 this_cpu_dec(trace_percpu_buffer->nesting);
3183 }
3184
alloc_percpu_trace_buffer(void)3185 static int alloc_percpu_trace_buffer(void)
3186 {
3187 struct trace_buffer_struct __percpu *buffers;
3188
3189 if (trace_percpu_buffer)
3190 return 0;
3191
3192 buffers = alloc_percpu(struct trace_buffer_struct);
3193 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3194 return -ENOMEM;
3195
3196 trace_percpu_buffer = buffers;
3197 return 0;
3198 }
3199
3200 static int buffers_allocated;
3201
trace_printk_init_buffers(void)3202 void trace_printk_init_buffers(void)
3203 {
3204 if (buffers_allocated)
3205 return;
3206
3207 if (alloc_percpu_trace_buffer())
3208 return;
3209
3210 /* trace_printk() is for debug use only. Don't use it in production. */
3211
3212 pr_warn("\n");
3213 pr_warn("**********************************************************\n");
3214 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3215 pr_warn("** **\n");
3216 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3217 pr_warn("** **\n");
3218 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3219 pr_warn("** unsafe for production use. **\n");
3220 pr_warn("** **\n");
3221 pr_warn("** If you see this message and you are not debugging **\n");
3222 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3223 pr_warn("** **\n");
3224 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3225 pr_warn("**********************************************************\n");
3226
3227 /* Expand the buffers to set size */
3228 tracing_update_buffers();
3229
3230 buffers_allocated = 1;
3231
3232 /*
3233 * trace_printk_init_buffers() can be called by modules.
3234 * If that happens, then we need to start cmdline recording
3235 * directly here. If the global_trace.buffer is already
3236 * allocated here, then this was called by module code.
3237 */
3238 if (global_trace.array_buffer.buffer)
3239 tracing_start_cmdline_record();
3240 }
3241 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3242
trace_printk_start_comm(void)3243 void trace_printk_start_comm(void)
3244 {
3245 /* Start tracing comms if trace printk is set */
3246 if (!buffers_allocated)
3247 return;
3248 tracing_start_cmdline_record();
3249 }
3250
trace_printk_start_stop_comm(int enabled)3251 static void trace_printk_start_stop_comm(int enabled)
3252 {
3253 if (!buffers_allocated)
3254 return;
3255
3256 if (enabled)
3257 tracing_start_cmdline_record();
3258 else
3259 tracing_stop_cmdline_record();
3260 }
3261
3262 /**
3263 * trace_vbprintk - write binary msg to tracing buffer
3264 * @ip: The address of the caller
3265 * @fmt: The string format to write to the buffer
3266 * @args: Arguments for @fmt
3267 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3268 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3269 {
3270 struct trace_event_call *call = &event_bprint;
3271 struct ring_buffer_event *event;
3272 struct trace_buffer *buffer;
3273 struct trace_array *tr = &global_trace;
3274 struct bprint_entry *entry;
3275 unsigned long flags;
3276 char *tbuffer;
3277 int len = 0, size, pc;
3278
3279 if (unlikely(tracing_selftest_running || tracing_disabled))
3280 return 0;
3281
3282 /* Don't pollute graph traces with trace_vprintk internals */
3283 pause_graph_tracing();
3284
3285 pc = preempt_count();
3286 preempt_disable_notrace();
3287
3288 tbuffer = get_trace_buf();
3289 if (!tbuffer) {
3290 len = 0;
3291 goto out_nobuffer;
3292 }
3293
3294 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3295
3296 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3297 goto out_put;
3298
3299 local_save_flags(flags);
3300 size = sizeof(*entry) + sizeof(u32) * len;
3301 buffer = tr->array_buffer.buffer;
3302 ring_buffer_nest_start(buffer);
3303 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3304 flags, pc);
3305 if (!event)
3306 goto out;
3307 entry = ring_buffer_event_data(event);
3308 entry->ip = ip;
3309 entry->fmt = fmt;
3310
3311 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3312 if (!call_filter_check_discard(call, entry, buffer, event)) {
3313 __buffer_unlock_commit(buffer, event);
3314 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3315 }
3316
3317 out:
3318 ring_buffer_nest_end(buffer);
3319 out_put:
3320 put_trace_buf();
3321
3322 out_nobuffer:
3323 preempt_enable_notrace();
3324 unpause_graph_tracing();
3325
3326 return len;
3327 }
3328 EXPORT_SYMBOL_GPL(trace_vbprintk);
3329
3330 __printf(3, 0)
3331 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3332 __trace_array_vprintk(struct trace_buffer *buffer,
3333 unsigned long ip, const char *fmt, va_list args)
3334 {
3335 struct trace_event_call *call = &event_print;
3336 struct ring_buffer_event *event;
3337 int len = 0, size, pc;
3338 struct print_entry *entry;
3339 unsigned long flags;
3340 char *tbuffer;
3341
3342 if (tracing_disabled || tracing_selftest_running)
3343 return 0;
3344
3345 /* Don't pollute graph traces with trace_vprintk internals */
3346 pause_graph_tracing();
3347
3348 pc = preempt_count();
3349 preempt_disable_notrace();
3350
3351
3352 tbuffer = get_trace_buf();
3353 if (!tbuffer) {
3354 len = 0;
3355 goto out_nobuffer;
3356 }
3357
3358 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3359
3360 local_save_flags(flags);
3361 size = sizeof(*entry) + len + 1;
3362 ring_buffer_nest_start(buffer);
3363 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3364 flags, pc);
3365 if (!event)
3366 goto out;
3367 entry = ring_buffer_event_data(event);
3368 entry->ip = ip;
3369
3370 memcpy(&entry->buf, tbuffer, len + 1);
3371 if (!call_filter_check_discard(call, entry, buffer, event)) {
3372 __buffer_unlock_commit(buffer, event);
3373 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3374 }
3375
3376 out:
3377 ring_buffer_nest_end(buffer);
3378 put_trace_buf();
3379
3380 out_nobuffer:
3381 preempt_enable_notrace();
3382 unpause_graph_tracing();
3383
3384 return len;
3385 }
3386
3387 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3388 int trace_array_vprintk(struct trace_array *tr,
3389 unsigned long ip, const char *fmt, va_list args)
3390 {
3391 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3392 }
3393
3394 /**
3395 * trace_array_printk - Print a message to a specific instance
3396 * @tr: The instance trace_array descriptor
3397 * @ip: The instruction pointer that this is called from.
3398 * @fmt: The format to print (printf format)
3399 *
3400 * If a subsystem sets up its own instance, they have the right to
3401 * printk strings into their tracing instance buffer using this
3402 * function. Note, this function will not write into the top level
3403 * buffer (use trace_printk() for that), as writing into the top level
3404 * buffer should only have events that can be individually disabled.
3405 * trace_printk() is only used for debugging a kernel, and should not
3406 * be ever encorporated in normal use.
3407 *
3408 * trace_array_printk() can be used, as it will not add noise to the
3409 * top level tracing buffer.
3410 *
3411 * Note, trace_array_init_printk() must be called on @tr before this
3412 * can be used.
3413 */
3414 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3415 int trace_array_printk(struct trace_array *tr,
3416 unsigned long ip, const char *fmt, ...)
3417 {
3418 int ret;
3419 va_list ap;
3420
3421 if (!tr)
3422 return -ENOENT;
3423
3424 /* This is only allowed for created instances */
3425 if (tr == &global_trace)
3426 return 0;
3427
3428 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3429 return 0;
3430
3431 va_start(ap, fmt);
3432 ret = trace_array_vprintk(tr, ip, fmt, ap);
3433 va_end(ap);
3434 return ret;
3435 }
3436 EXPORT_SYMBOL_GPL(trace_array_printk);
3437
3438 /**
3439 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3440 * @tr: The trace array to initialize the buffers for
3441 *
3442 * As trace_array_printk() only writes into instances, they are OK to
3443 * have in the kernel (unlike trace_printk()). This needs to be called
3444 * before trace_array_printk() can be used on a trace_array.
3445 */
trace_array_init_printk(struct trace_array * tr)3446 int trace_array_init_printk(struct trace_array *tr)
3447 {
3448 if (!tr)
3449 return -ENOENT;
3450
3451 /* This is only allowed for created instances */
3452 if (tr == &global_trace)
3453 return -EINVAL;
3454
3455 return alloc_percpu_trace_buffer();
3456 }
3457 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3458
3459 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3460 int trace_array_printk_buf(struct trace_buffer *buffer,
3461 unsigned long ip, const char *fmt, ...)
3462 {
3463 int ret;
3464 va_list ap;
3465
3466 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3467 return 0;
3468
3469 va_start(ap, fmt);
3470 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3471 va_end(ap);
3472 return ret;
3473 }
3474
3475 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3476 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3477 {
3478 return trace_array_vprintk(&global_trace, ip, fmt, args);
3479 }
3480 EXPORT_SYMBOL_GPL(trace_vprintk);
3481
trace_iterator_increment(struct trace_iterator * iter)3482 static void trace_iterator_increment(struct trace_iterator *iter)
3483 {
3484 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3485
3486 iter->idx++;
3487 if (buf_iter)
3488 ring_buffer_iter_advance(buf_iter);
3489 }
3490
3491 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3492 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3493 unsigned long *lost_events)
3494 {
3495 struct ring_buffer_event *event;
3496 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3497
3498 if (buf_iter) {
3499 event = ring_buffer_iter_peek(buf_iter, ts);
3500 if (lost_events)
3501 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3502 (unsigned long)-1 : 0;
3503 } else {
3504 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3505 lost_events);
3506 }
3507
3508 if (event) {
3509 iter->ent_size = ring_buffer_event_length(event);
3510 return ring_buffer_event_data(event);
3511 }
3512 iter->ent_size = 0;
3513 return NULL;
3514 }
3515
3516 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3517 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3518 unsigned long *missing_events, u64 *ent_ts)
3519 {
3520 struct trace_buffer *buffer = iter->array_buffer->buffer;
3521 struct trace_entry *ent, *next = NULL;
3522 unsigned long lost_events = 0, next_lost = 0;
3523 int cpu_file = iter->cpu_file;
3524 u64 next_ts = 0, ts;
3525 int next_cpu = -1;
3526 int next_size = 0;
3527 int cpu;
3528
3529 /*
3530 * If we are in a per_cpu trace file, don't bother by iterating over
3531 * all cpu and peek directly.
3532 */
3533 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3534 if (ring_buffer_empty_cpu(buffer, cpu_file))
3535 return NULL;
3536 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3537 if (ent_cpu)
3538 *ent_cpu = cpu_file;
3539
3540 return ent;
3541 }
3542
3543 for_each_tracing_cpu(cpu) {
3544
3545 if (ring_buffer_empty_cpu(buffer, cpu))
3546 continue;
3547
3548 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3549
3550 /*
3551 * Pick the entry with the smallest timestamp:
3552 */
3553 if (ent && (!next || ts < next_ts)) {
3554 next = ent;
3555 next_cpu = cpu;
3556 next_ts = ts;
3557 next_lost = lost_events;
3558 next_size = iter->ent_size;
3559 }
3560 }
3561
3562 iter->ent_size = next_size;
3563
3564 if (ent_cpu)
3565 *ent_cpu = next_cpu;
3566
3567 if (ent_ts)
3568 *ent_ts = next_ts;
3569
3570 if (missing_events)
3571 *missing_events = next_lost;
3572
3573 return next;
3574 }
3575
3576 #define STATIC_TEMP_BUF_SIZE 128
3577 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3578
3579 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3580 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3581 int *ent_cpu, u64 *ent_ts)
3582 {
3583 /* __find_next_entry will reset ent_size */
3584 int ent_size = iter->ent_size;
3585 struct trace_entry *entry;
3586
3587 /*
3588 * If called from ftrace_dump(), then the iter->temp buffer
3589 * will be the static_temp_buf and not created from kmalloc.
3590 * If the entry size is greater than the buffer, we can
3591 * not save it. Just return NULL in that case. This is only
3592 * used to add markers when two consecutive events' time
3593 * stamps have a large delta. See trace_print_lat_context()
3594 */
3595 if (iter->temp == static_temp_buf &&
3596 STATIC_TEMP_BUF_SIZE < ent_size)
3597 return NULL;
3598
3599 /*
3600 * The __find_next_entry() may call peek_next_entry(), which may
3601 * call ring_buffer_peek() that may make the contents of iter->ent
3602 * undefined. Need to copy iter->ent now.
3603 */
3604 if (iter->ent && iter->ent != iter->temp) {
3605 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3606 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3607 void *temp;
3608 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3609 if (!temp)
3610 return NULL;
3611 kfree(iter->temp);
3612 iter->temp = temp;
3613 iter->temp_size = iter->ent_size;
3614 }
3615 memcpy(iter->temp, iter->ent, iter->ent_size);
3616 iter->ent = iter->temp;
3617 }
3618 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3619 /* Put back the original ent_size */
3620 iter->ent_size = ent_size;
3621
3622 return entry;
3623 }
3624
3625 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3626 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3627 {
3628 iter->ent = __find_next_entry(iter, &iter->cpu,
3629 &iter->lost_events, &iter->ts);
3630
3631 if (iter->ent)
3632 trace_iterator_increment(iter);
3633
3634 return iter->ent ? iter : NULL;
3635 }
3636
trace_consume(struct trace_iterator * iter)3637 static void trace_consume(struct trace_iterator *iter)
3638 {
3639 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3640 &iter->lost_events);
3641 }
3642
s_next(struct seq_file * m,void * v,loff_t * pos)3643 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3644 {
3645 struct trace_iterator *iter = m->private;
3646 int i = (int)*pos;
3647 void *ent;
3648
3649 WARN_ON_ONCE(iter->leftover);
3650
3651 (*pos)++;
3652
3653 /* can't go backwards */
3654 if (iter->idx > i)
3655 return NULL;
3656
3657 if (iter->idx < 0)
3658 ent = trace_find_next_entry_inc(iter);
3659 else
3660 ent = iter;
3661
3662 while (ent && iter->idx < i)
3663 ent = trace_find_next_entry_inc(iter);
3664
3665 iter->pos = *pos;
3666
3667 return ent;
3668 }
3669
tracing_iter_reset(struct trace_iterator * iter,int cpu)3670 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3671 {
3672 struct ring_buffer_iter *buf_iter;
3673 unsigned long entries = 0;
3674 u64 ts;
3675
3676 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3677
3678 buf_iter = trace_buffer_iter(iter, cpu);
3679 if (!buf_iter)
3680 return;
3681
3682 ring_buffer_iter_reset(buf_iter);
3683
3684 /*
3685 * We could have the case with the max latency tracers
3686 * that a reset never took place on a cpu. This is evident
3687 * by the timestamp being before the start of the buffer.
3688 */
3689 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3690 if (ts >= iter->array_buffer->time_start)
3691 break;
3692 entries++;
3693 ring_buffer_iter_advance(buf_iter);
3694 }
3695
3696 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3697 }
3698
3699 /*
3700 * The current tracer is copied to avoid a global locking
3701 * all around.
3702 */
s_start(struct seq_file * m,loff_t * pos)3703 static void *s_start(struct seq_file *m, loff_t *pos)
3704 {
3705 struct trace_iterator *iter = m->private;
3706 struct trace_array *tr = iter->tr;
3707 int cpu_file = iter->cpu_file;
3708 void *p = NULL;
3709 loff_t l = 0;
3710 int cpu;
3711
3712 /*
3713 * copy the tracer to avoid using a global lock all around.
3714 * iter->trace is a copy of current_trace, the pointer to the
3715 * name may be used instead of a strcmp(), as iter->trace->name
3716 * will point to the same string as current_trace->name.
3717 */
3718 mutex_lock(&trace_types_lock);
3719 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3720 *iter->trace = *tr->current_trace;
3721 mutex_unlock(&trace_types_lock);
3722
3723 #ifdef CONFIG_TRACER_MAX_TRACE
3724 if (iter->snapshot && iter->trace->use_max_tr)
3725 return ERR_PTR(-EBUSY);
3726 #endif
3727
3728 if (*pos != iter->pos) {
3729 iter->ent = NULL;
3730 iter->cpu = 0;
3731 iter->idx = -1;
3732
3733 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3734 for_each_tracing_cpu(cpu)
3735 tracing_iter_reset(iter, cpu);
3736 } else
3737 tracing_iter_reset(iter, cpu_file);
3738
3739 iter->leftover = 0;
3740 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3741 ;
3742
3743 } else {
3744 /*
3745 * If we overflowed the seq_file before, then we want
3746 * to just reuse the trace_seq buffer again.
3747 */
3748 if (iter->leftover)
3749 p = iter;
3750 else {
3751 l = *pos - 1;
3752 p = s_next(m, p, &l);
3753 }
3754 }
3755
3756 trace_event_read_lock();
3757 trace_access_lock(cpu_file);
3758 return p;
3759 }
3760
s_stop(struct seq_file * m,void * p)3761 static void s_stop(struct seq_file *m, void *p)
3762 {
3763 struct trace_iterator *iter = m->private;
3764
3765 #ifdef CONFIG_TRACER_MAX_TRACE
3766 if (iter->snapshot && iter->trace->use_max_tr)
3767 return;
3768 #endif
3769
3770 trace_access_unlock(iter->cpu_file);
3771 trace_event_read_unlock();
3772 }
3773
3774 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3775 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3776 unsigned long *entries, int cpu)
3777 {
3778 unsigned long count;
3779
3780 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3781 /*
3782 * If this buffer has skipped entries, then we hold all
3783 * entries for the trace and we need to ignore the
3784 * ones before the time stamp.
3785 */
3786 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3787 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3788 /* total is the same as the entries */
3789 *total = count;
3790 } else
3791 *total = count +
3792 ring_buffer_overrun_cpu(buf->buffer, cpu);
3793 *entries = count;
3794 }
3795
3796 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3797 get_total_entries(struct array_buffer *buf,
3798 unsigned long *total, unsigned long *entries)
3799 {
3800 unsigned long t, e;
3801 int cpu;
3802
3803 *total = 0;
3804 *entries = 0;
3805
3806 for_each_tracing_cpu(cpu) {
3807 get_total_entries_cpu(buf, &t, &e, cpu);
3808 *total += t;
3809 *entries += e;
3810 }
3811 }
3812
trace_total_entries_cpu(struct trace_array * tr,int cpu)3813 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3814 {
3815 unsigned long total, entries;
3816
3817 if (!tr)
3818 tr = &global_trace;
3819
3820 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3821
3822 return entries;
3823 }
3824
trace_total_entries(struct trace_array * tr)3825 unsigned long trace_total_entries(struct trace_array *tr)
3826 {
3827 unsigned long total, entries;
3828
3829 if (!tr)
3830 tr = &global_trace;
3831
3832 get_total_entries(&tr->array_buffer, &total, &entries);
3833
3834 return entries;
3835 }
3836
print_lat_help_header(struct seq_file * m)3837 static void print_lat_help_header(struct seq_file *m)
3838 {
3839 seq_puts(m, "# _------=> CPU# \n"
3840 "# / _-----=> irqs-off \n"
3841 "# | / _----=> need-resched \n"
3842 "# || / _---=> hardirq/softirq \n"
3843 "# ||| / _--=> preempt-depth \n"
3844 "# |||| / delay \n"
3845 "# cmd pid ||||| time | caller \n"
3846 "# \\ / ||||| \\ | / \n");
3847 }
3848
print_event_info(struct array_buffer * buf,struct seq_file * m)3849 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3850 {
3851 unsigned long total;
3852 unsigned long entries;
3853
3854 get_total_entries(buf, &total, &entries);
3855 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3856 entries, total, num_online_cpus());
3857 seq_puts(m, "#\n");
3858 }
3859
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3860 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3861 unsigned int flags)
3862 {
3863 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3864
3865 print_event_info(buf, m);
3866
3867 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3868 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3869 }
3870
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3871 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3872 unsigned int flags)
3873 {
3874 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3875 const char *space = " ";
3876 int prec = tgid ? 12 : 2;
3877
3878 print_event_info(buf, m);
3879
3880 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3881 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3882 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3883 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3884 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3885 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3886 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3887 }
3888
3889 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3890 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3891 {
3892 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3893 struct array_buffer *buf = iter->array_buffer;
3894 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3895 struct tracer *type = iter->trace;
3896 unsigned long entries;
3897 unsigned long total;
3898 const char *name = "preemption";
3899
3900 name = type->name;
3901
3902 get_total_entries(buf, &total, &entries);
3903
3904 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3905 name, UTS_RELEASE);
3906 seq_puts(m, "# -----------------------------------"
3907 "---------------------------------\n");
3908 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3909 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3910 nsecs_to_usecs(data->saved_latency),
3911 entries,
3912 total,
3913 buf->cpu,
3914 #if defined(CONFIG_PREEMPT_NONE)
3915 "server",
3916 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3917 "desktop",
3918 #elif defined(CONFIG_PREEMPT)
3919 "preempt",
3920 #elif defined(CONFIG_PREEMPT_RT)
3921 "preempt_rt",
3922 #else
3923 "unknown",
3924 #endif
3925 /* These are reserved for later use */
3926 0, 0, 0, 0);
3927 #ifdef CONFIG_SMP
3928 seq_printf(m, " #P:%d)\n", num_online_cpus());
3929 #else
3930 seq_puts(m, ")\n");
3931 #endif
3932 seq_puts(m, "# -----------------\n");
3933 seq_printf(m, "# | task: %.16s-%d "
3934 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3935 data->comm, data->pid,
3936 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3937 data->policy, data->rt_priority);
3938 seq_puts(m, "# -----------------\n");
3939
3940 if (data->critical_start) {
3941 seq_puts(m, "# => started at: ");
3942 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3943 trace_print_seq(m, &iter->seq);
3944 seq_puts(m, "\n# => ended at: ");
3945 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3946 trace_print_seq(m, &iter->seq);
3947 seq_puts(m, "\n#\n");
3948 }
3949
3950 seq_puts(m, "#\n");
3951 }
3952
test_cpu_buff_start(struct trace_iterator * iter)3953 static void test_cpu_buff_start(struct trace_iterator *iter)
3954 {
3955 struct trace_seq *s = &iter->seq;
3956 struct trace_array *tr = iter->tr;
3957
3958 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3959 return;
3960
3961 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3962 return;
3963
3964 if (cpumask_available(iter->started) &&
3965 cpumask_test_cpu(iter->cpu, iter->started))
3966 return;
3967
3968 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3969 return;
3970
3971 if (cpumask_available(iter->started))
3972 cpumask_set_cpu(iter->cpu, iter->started);
3973
3974 /* Don't print started cpu buffer for the first entry of the trace */
3975 if (iter->idx > 1)
3976 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3977 iter->cpu);
3978 }
3979
print_trace_fmt(struct trace_iterator * iter)3980 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3981 {
3982 struct trace_array *tr = iter->tr;
3983 struct trace_seq *s = &iter->seq;
3984 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3985 struct trace_entry *entry;
3986 struct trace_event *event;
3987
3988 entry = iter->ent;
3989
3990 test_cpu_buff_start(iter);
3991
3992 event = ftrace_find_event(entry->type);
3993
3994 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3995 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3996 trace_print_lat_context(iter);
3997 else
3998 trace_print_context(iter);
3999 }
4000
4001 if (trace_seq_has_overflowed(s))
4002 return TRACE_TYPE_PARTIAL_LINE;
4003
4004 if (event)
4005 return event->funcs->trace(iter, sym_flags, event);
4006
4007 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4008
4009 return trace_handle_return(s);
4010 }
4011
print_raw_fmt(struct trace_iterator * iter)4012 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4013 {
4014 struct trace_array *tr = iter->tr;
4015 struct trace_seq *s = &iter->seq;
4016 struct trace_entry *entry;
4017 struct trace_event *event;
4018
4019 entry = iter->ent;
4020
4021 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4022 trace_seq_printf(s, "%d %d %llu ",
4023 entry->pid, iter->cpu, iter->ts);
4024
4025 if (trace_seq_has_overflowed(s))
4026 return TRACE_TYPE_PARTIAL_LINE;
4027
4028 event = ftrace_find_event(entry->type);
4029 if (event)
4030 return event->funcs->raw(iter, 0, event);
4031
4032 trace_seq_printf(s, "%d ?\n", entry->type);
4033
4034 return trace_handle_return(s);
4035 }
4036
print_hex_fmt(struct trace_iterator * iter)4037 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4038 {
4039 struct trace_array *tr = iter->tr;
4040 struct trace_seq *s = &iter->seq;
4041 unsigned char newline = '\n';
4042 struct trace_entry *entry;
4043 struct trace_event *event;
4044
4045 entry = iter->ent;
4046
4047 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4048 SEQ_PUT_HEX_FIELD(s, entry->pid);
4049 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4050 SEQ_PUT_HEX_FIELD(s, iter->ts);
4051 if (trace_seq_has_overflowed(s))
4052 return TRACE_TYPE_PARTIAL_LINE;
4053 }
4054
4055 event = ftrace_find_event(entry->type);
4056 if (event) {
4057 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4058 if (ret != TRACE_TYPE_HANDLED)
4059 return ret;
4060 }
4061
4062 SEQ_PUT_FIELD(s, newline);
4063
4064 return trace_handle_return(s);
4065 }
4066
print_bin_fmt(struct trace_iterator * iter)4067 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4068 {
4069 struct trace_array *tr = iter->tr;
4070 struct trace_seq *s = &iter->seq;
4071 struct trace_entry *entry;
4072 struct trace_event *event;
4073
4074 entry = iter->ent;
4075
4076 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4077 SEQ_PUT_FIELD(s, entry->pid);
4078 SEQ_PUT_FIELD(s, iter->cpu);
4079 SEQ_PUT_FIELD(s, iter->ts);
4080 if (trace_seq_has_overflowed(s))
4081 return TRACE_TYPE_PARTIAL_LINE;
4082 }
4083
4084 event = ftrace_find_event(entry->type);
4085 return event ? event->funcs->binary(iter, 0, event) :
4086 TRACE_TYPE_HANDLED;
4087 }
4088
trace_empty(struct trace_iterator * iter)4089 int trace_empty(struct trace_iterator *iter)
4090 {
4091 struct ring_buffer_iter *buf_iter;
4092 int cpu;
4093
4094 /* If we are looking at one CPU buffer, only check that one */
4095 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4096 cpu = iter->cpu_file;
4097 buf_iter = trace_buffer_iter(iter, cpu);
4098 if (buf_iter) {
4099 if (!ring_buffer_iter_empty(buf_iter))
4100 return 0;
4101 } else {
4102 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4103 return 0;
4104 }
4105 return 1;
4106 }
4107
4108 for_each_tracing_cpu(cpu) {
4109 buf_iter = trace_buffer_iter(iter, cpu);
4110 if (buf_iter) {
4111 if (!ring_buffer_iter_empty(buf_iter))
4112 return 0;
4113 } else {
4114 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4115 return 0;
4116 }
4117 }
4118
4119 return 1;
4120 }
4121
4122 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4123 enum print_line_t print_trace_line(struct trace_iterator *iter)
4124 {
4125 struct trace_array *tr = iter->tr;
4126 unsigned long trace_flags = tr->trace_flags;
4127 enum print_line_t ret;
4128
4129 if (iter->lost_events) {
4130 if (iter->lost_events == (unsigned long)-1)
4131 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4132 iter->cpu);
4133 else
4134 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4135 iter->cpu, iter->lost_events);
4136 if (trace_seq_has_overflowed(&iter->seq))
4137 return TRACE_TYPE_PARTIAL_LINE;
4138 }
4139
4140 if (iter->trace && iter->trace->print_line) {
4141 ret = iter->trace->print_line(iter);
4142 if (ret != TRACE_TYPE_UNHANDLED)
4143 return ret;
4144 }
4145
4146 if (iter->ent->type == TRACE_BPUTS &&
4147 trace_flags & TRACE_ITER_PRINTK &&
4148 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4149 return trace_print_bputs_msg_only(iter);
4150
4151 if (iter->ent->type == TRACE_BPRINT &&
4152 trace_flags & TRACE_ITER_PRINTK &&
4153 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4154 return trace_print_bprintk_msg_only(iter);
4155
4156 if (iter->ent->type == TRACE_PRINT &&
4157 trace_flags & TRACE_ITER_PRINTK &&
4158 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4159 return trace_print_printk_msg_only(iter);
4160
4161 if (trace_flags & TRACE_ITER_BIN)
4162 return print_bin_fmt(iter);
4163
4164 if (trace_flags & TRACE_ITER_HEX)
4165 return print_hex_fmt(iter);
4166
4167 if (trace_flags & TRACE_ITER_RAW)
4168 return print_raw_fmt(iter);
4169
4170 return print_trace_fmt(iter);
4171 }
4172
trace_latency_header(struct seq_file * m)4173 void trace_latency_header(struct seq_file *m)
4174 {
4175 struct trace_iterator *iter = m->private;
4176 struct trace_array *tr = iter->tr;
4177
4178 /* print nothing if the buffers are empty */
4179 if (trace_empty(iter))
4180 return;
4181
4182 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4183 print_trace_header(m, iter);
4184
4185 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4186 print_lat_help_header(m);
4187 }
4188
trace_default_header(struct seq_file * m)4189 void trace_default_header(struct seq_file *m)
4190 {
4191 struct trace_iterator *iter = m->private;
4192 struct trace_array *tr = iter->tr;
4193 unsigned long trace_flags = tr->trace_flags;
4194
4195 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4196 return;
4197
4198 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4199 /* print nothing if the buffers are empty */
4200 if (trace_empty(iter))
4201 return;
4202 print_trace_header(m, iter);
4203 if (!(trace_flags & TRACE_ITER_VERBOSE))
4204 print_lat_help_header(m);
4205 } else {
4206 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4207 if (trace_flags & TRACE_ITER_IRQ_INFO)
4208 print_func_help_header_irq(iter->array_buffer,
4209 m, trace_flags);
4210 else
4211 print_func_help_header(iter->array_buffer, m,
4212 trace_flags);
4213 }
4214 }
4215 }
4216
test_ftrace_alive(struct seq_file * m)4217 static void test_ftrace_alive(struct seq_file *m)
4218 {
4219 if (!ftrace_is_dead())
4220 return;
4221 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4222 "# MAY BE MISSING FUNCTION EVENTS\n");
4223 }
4224
4225 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4226 static void show_snapshot_main_help(struct seq_file *m)
4227 {
4228 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4229 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4230 "# Takes a snapshot of the main buffer.\n"
4231 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4232 "# (Doesn't have to be '2' works with any number that\n"
4233 "# is not a '0' or '1')\n");
4234 }
4235
show_snapshot_percpu_help(struct seq_file * m)4236 static void show_snapshot_percpu_help(struct seq_file *m)
4237 {
4238 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4239 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4240 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4241 "# Takes a snapshot of the main buffer for this cpu.\n");
4242 #else
4243 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4244 "# Must use main snapshot file to allocate.\n");
4245 #endif
4246 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4247 "# (Doesn't have to be '2' works with any number that\n"
4248 "# is not a '0' or '1')\n");
4249 }
4250
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4251 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4252 {
4253 if (iter->tr->allocated_snapshot)
4254 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4255 else
4256 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4257
4258 seq_puts(m, "# Snapshot commands:\n");
4259 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4260 show_snapshot_main_help(m);
4261 else
4262 show_snapshot_percpu_help(m);
4263 }
4264 #else
4265 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4266 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4267 #endif
4268
s_show(struct seq_file * m,void * v)4269 static int s_show(struct seq_file *m, void *v)
4270 {
4271 struct trace_iterator *iter = v;
4272 int ret;
4273
4274 if (iter->ent == NULL) {
4275 if (iter->tr) {
4276 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4277 seq_puts(m, "#\n");
4278 test_ftrace_alive(m);
4279 }
4280 if (iter->snapshot && trace_empty(iter))
4281 print_snapshot_help(m, iter);
4282 else if (iter->trace && iter->trace->print_header)
4283 iter->trace->print_header(m);
4284 else
4285 trace_default_header(m);
4286
4287 } else if (iter->leftover) {
4288 /*
4289 * If we filled the seq_file buffer earlier, we
4290 * want to just show it now.
4291 */
4292 ret = trace_print_seq(m, &iter->seq);
4293
4294 /* ret should this time be zero, but you never know */
4295 iter->leftover = ret;
4296
4297 } else {
4298 print_trace_line(iter);
4299 ret = trace_print_seq(m, &iter->seq);
4300 /*
4301 * If we overflow the seq_file buffer, then it will
4302 * ask us for this data again at start up.
4303 * Use that instead.
4304 * ret is 0 if seq_file write succeeded.
4305 * -1 otherwise.
4306 */
4307 iter->leftover = ret;
4308 }
4309
4310 return 0;
4311 }
4312
4313 /*
4314 * Should be used after trace_array_get(), trace_types_lock
4315 * ensures that i_cdev was already initialized.
4316 */
tracing_get_cpu(struct inode * inode)4317 static inline int tracing_get_cpu(struct inode *inode)
4318 {
4319 if (inode->i_cdev) /* See trace_create_cpu_file() */
4320 return (long)inode->i_cdev - 1;
4321 return RING_BUFFER_ALL_CPUS;
4322 }
4323
4324 static const struct seq_operations tracer_seq_ops = {
4325 .start = s_start,
4326 .next = s_next,
4327 .stop = s_stop,
4328 .show = s_show,
4329 };
4330
4331 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4332 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4333 {
4334 struct trace_array *tr = inode->i_private;
4335 struct trace_iterator *iter;
4336 int cpu;
4337
4338 if (tracing_disabled)
4339 return ERR_PTR(-ENODEV);
4340
4341 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4342 if (!iter)
4343 return ERR_PTR(-ENOMEM);
4344
4345 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4346 GFP_KERNEL);
4347 if (!iter->buffer_iter)
4348 goto release;
4349
4350 /*
4351 * trace_find_next_entry() may need to save off iter->ent.
4352 * It will place it into the iter->temp buffer. As most
4353 * events are less than 128, allocate a buffer of that size.
4354 * If one is greater, then trace_find_next_entry() will
4355 * allocate a new buffer to adjust for the bigger iter->ent.
4356 * It's not critical if it fails to get allocated here.
4357 */
4358 iter->temp = kmalloc(128, GFP_KERNEL);
4359 if (iter->temp)
4360 iter->temp_size = 128;
4361
4362 /*
4363 * We make a copy of the current tracer to avoid concurrent
4364 * changes on it while we are reading.
4365 */
4366 mutex_lock(&trace_types_lock);
4367 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4368 if (!iter->trace)
4369 goto fail;
4370
4371 *iter->trace = *tr->current_trace;
4372
4373 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4374 goto fail;
4375
4376 iter->tr = tr;
4377
4378 #ifdef CONFIG_TRACER_MAX_TRACE
4379 /* Currently only the top directory has a snapshot */
4380 if (tr->current_trace->print_max || snapshot)
4381 iter->array_buffer = &tr->max_buffer;
4382 else
4383 #endif
4384 iter->array_buffer = &tr->array_buffer;
4385 iter->snapshot = snapshot;
4386 iter->pos = -1;
4387 iter->cpu_file = tracing_get_cpu(inode);
4388 mutex_init(&iter->mutex);
4389
4390 /* Notify the tracer early; before we stop tracing. */
4391 if (iter->trace->open)
4392 iter->trace->open(iter);
4393
4394 /* Annotate start of buffers if we had overruns */
4395 if (ring_buffer_overruns(iter->array_buffer->buffer))
4396 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4397
4398 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4399 if (trace_clocks[tr->clock_id].in_ns)
4400 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4401
4402 /*
4403 * If pause-on-trace is enabled, then stop the trace while
4404 * dumping, unless this is the "snapshot" file
4405 */
4406 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4407 tracing_stop_tr(tr);
4408
4409 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4410 for_each_tracing_cpu(cpu) {
4411 iter->buffer_iter[cpu] =
4412 ring_buffer_read_prepare(iter->array_buffer->buffer,
4413 cpu, GFP_KERNEL);
4414 }
4415 ring_buffer_read_prepare_sync();
4416 for_each_tracing_cpu(cpu) {
4417 ring_buffer_read_start(iter->buffer_iter[cpu]);
4418 tracing_iter_reset(iter, cpu);
4419 }
4420 } else {
4421 cpu = iter->cpu_file;
4422 iter->buffer_iter[cpu] =
4423 ring_buffer_read_prepare(iter->array_buffer->buffer,
4424 cpu, GFP_KERNEL);
4425 ring_buffer_read_prepare_sync();
4426 ring_buffer_read_start(iter->buffer_iter[cpu]);
4427 tracing_iter_reset(iter, cpu);
4428 }
4429
4430 mutex_unlock(&trace_types_lock);
4431
4432 return iter;
4433
4434 fail:
4435 mutex_unlock(&trace_types_lock);
4436 kfree(iter->trace);
4437 kfree(iter->temp);
4438 kfree(iter->buffer_iter);
4439 release:
4440 seq_release_private(inode, file);
4441 return ERR_PTR(-ENOMEM);
4442 }
4443
tracing_open_generic(struct inode * inode,struct file * filp)4444 int tracing_open_generic(struct inode *inode, struct file *filp)
4445 {
4446 int ret;
4447
4448 ret = tracing_check_open_get_tr(NULL);
4449 if (ret)
4450 return ret;
4451
4452 filp->private_data = inode->i_private;
4453 return 0;
4454 }
4455
tracing_is_disabled(void)4456 bool tracing_is_disabled(void)
4457 {
4458 return (tracing_disabled) ? true: false;
4459 }
4460
4461 /*
4462 * Open and update trace_array ref count.
4463 * Must have the current trace_array passed to it.
4464 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4465 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4466 {
4467 struct trace_array *tr = inode->i_private;
4468 int ret;
4469
4470 ret = tracing_check_open_get_tr(tr);
4471 if (ret)
4472 return ret;
4473
4474 filp->private_data = inode->i_private;
4475
4476 return 0;
4477 }
4478
tracing_release(struct inode * inode,struct file * file)4479 static int tracing_release(struct inode *inode, struct file *file)
4480 {
4481 struct trace_array *tr = inode->i_private;
4482 struct seq_file *m = file->private_data;
4483 struct trace_iterator *iter;
4484 int cpu;
4485
4486 if (!(file->f_mode & FMODE_READ)) {
4487 trace_array_put(tr);
4488 return 0;
4489 }
4490
4491 /* Writes do not use seq_file */
4492 iter = m->private;
4493 mutex_lock(&trace_types_lock);
4494
4495 for_each_tracing_cpu(cpu) {
4496 if (iter->buffer_iter[cpu])
4497 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4498 }
4499
4500 if (iter->trace && iter->trace->close)
4501 iter->trace->close(iter);
4502
4503 if (!iter->snapshot && tr->stop_count)
4504 /* reenable tracing if it was previously enabled */
4505 tracing_start_tr(tr);
4506
4507 __trace_array_put(tr);
4508
4509 mutex_unlock(&trace_types_lock);
4510
4511 mutex_destroy(&iter->mutex);
4512 free_cpumask_var(iter->started);
4513 kfree(iter->temp);
4514 kfree(iter->trace);
4515 kfree(iter->buffer_iter);
4516 seq_release_private(inode, file);
4517
4518 return 0;
4519 }
4520
tracing_release_generic_tr(struct inode * inode,struct file * file)4521 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4522 {
4523 struct trace_array *tr = inode->i_private;
4524
4525 trace_array_put(tr);
4526 return 0;
4527 }
4528
tracing_single_release_tr(struct inode * inode,struct file * file)4529 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4530 {
4531 struct trace_array *tr = inode->i_private;
4532
4533 trace_array_put(tr);
4534
4535 return single_release(inode, file);
4536 }
4537
tracing_open(struct inode * inode,struct file * file)4538 static int tracing_open(struct inode *inode, struct file *file)
4539 {
4540 struct trace_array *tr = inode->i_private;
4541 struct trace_iterator *iter;
4542 int ret;
4543
4544 ret = tracing_check_open_get_tr(tr);
4545 if (ret)
4546 return ret;
4547
4548 /* If this file was open for write, then erase contents */
4549 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4550 int cpu = tracing_get_cpu(inode);
4551 struct array_buffer *trace_buf = &tr->array_buffer;
4552
4553 #ifdef CONFIG_TRACER_MAX_TRACE
4554 if (tr->current_trace->print_max)
4555 trace_buf = &tr->max_buffer;
4556 #endif
4557
4558 if (cpu == RING_BUFFER_ALL_CPUS)
4559 tracing_reset_online_cpus(trace_buf);
4560 else
4561 tracing_reset_cpu(trace_buf, cpu);
4562 }
4563
4564 if (file->f_mode & FMODE_READ) {
4565 iter = __tracing_open(inode, file, false);
4566 if (IS_ERR(iter))
4567 ret = PTR_ERR(iter);
4568 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4569 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4570 }
4571
4572 if (ret < 0)
4573 trace_array_put(tr);
4574
4575 return ret;
4576 }
4577
4578 /*
4579 * Some tracers are not suitable for instance buffers.
4580 * A tracer is always available for the global array (toplevel)
4581 * or if it explicitly states that it is.
4582 */
4583 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4584 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4585 {
4586 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4587 }
4588
4589 /* Find the next tracer that this trace array may use */
4590 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4591 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4592 {
4593 while (t && !trace_ok_for_array(t, tr))
4594 t = t->next;
4595
4596 return t;
4597 }
4598
4599 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4600 t_next(struct seq_file *m, void *v, loff_t *pos)
4601 {
4602 struct trace_array *tr = m->private;
4603 struct tracer *t = v;
4604
4605 (*pos)++;
4606
4607 if (t)
4608 t = get_tracer_for_array(tr, t->next);
4609
4610 return t;
4611 }
4612
t_start(struct seq_file * m,loff_t * pos)4613 static void *t_start(struct seq_file *m, loff_t *pos)
4614 {
4615 struct trace_array *tr = m->private;
4616 struct tracer *t;
4617 loff_t l = 0;
4618
4619 mutex_lock(&trace_types_lock);
4620
4621 t = get_tracer_for_array(tr, trace_types);
4622 for (; t && l < *pos; t = t_next(m, t, &l))
4623 ;
4624
4625 return t;
4626 }
4627
t_stop(struct seq_file * m,void * p)4628 static void t_stop(struct seq_file *m, void *p)
4629 {
4630 mutex_unlock(&trace_types_lock);
4631 }
4632
t_show(struct seq_file * m,void * v)4633 static int t_show(struct seq_file *m, void *v)
4634 {
4635 struct tracer *t = v;
4636
4637 if (!t)
4638 return 0;
4639
4640 seq_puts(m, t->name);
4641 if (t->next)
4642 seq_putc(m, ' ');
4643 else
4644 seq_putc(m, '\n');
4645
4646 return 0;
4647 }
4648
4649 static const struct seq_operations show_traces_seq_ops = {
4650 .start = t_start,
4651 .next = t_next,
4652 .stop = t_stop,
4653 .show = t_show,
4654 };
4655
show_traces_open(struct inode * inode,struct file * file)4656 static int show_traces_open(struct inode *inode, struct file *file)
4657 {
4658 struct trace_array *tr = inode->i_private;
4659 struct seq_file *m;
4660 int ret;
4661
4662 ret = tracing_check_open_get_tr(tr);
4663 if (ret)
4664 return ret;
4665
4666 ret = seq_open(file, &show_traces_seq_ops);
4667 if (ret) {
4668 trace_array_put(tr);
4669 return ret;
4670 }
4671
4672 m = file->private_data;
4673 m->private = tr;
4674
4675 return 0;
4676 }
4677
show_traces_release(struct inode * inode,struct file * file)4678 static int show_traces_release(struct inode *inode, struct file *file)
4679 {
4680 struct trace_array *tr = inode->i_private;
4681
4682 trace_array_put(tr);
4683 return seq_release(inode, file);
4684 }
4685
4686 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4687 tracing_write_stub(struct file *filp, const char __user *ubuf,
4688 size_t count, loff_t *ppos)
4689 {
4690 return count;
4691 }
4692
tracing_lseek(struct file * file,loff_t offset,int whence)4693 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4694 {
4695 int ret;
4696
4697 if (file->f_mode & FMODE_READ)
4698 ret = seq_lseek(file, offset, whence);
4699 else
4700 file->f_pos = ret = 0;
4701
4702 return ret;
4703 }
4704
4705 static const struct file_operations tracing_fops = {
4706 .open = tracing_open,
4707 .read = seq_read,
4708 .write = tracing_write_stub,
4709 .llseek = tracing_lseek,
4710 .release = tracing_release,
4711 };
4712
4713 static const struct file_operations show_traces_fops = {
4714 .open = show_traces_open,
4715 .read = seq_read,
4716 .llseek = seq_lseek,
4717 .release = show_traces_release,
4718 };
4719
4720 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4721 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4722 size_t count, loff_t *ppos)
4723 {
4724 struct trace_array *tr = file_inode(filp)->i_private;
4725 char *mask_str;
4726 int len;
4727
4728 len = snprintf(NULL, 0, "%*pb\n",
4729 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4730 mask_str = kmalloc(len, GFP_KERNEL);
4731 if (!mask_str)
4732 return -ENOMEM;
4733
4734 len = snprintf(mask_str, len, "%*pb\n",
4735 cpumask_pr_args(tr->tracing_cpumask));
4736 if (len >= count) {
4737 count = -EINVAL;
4738 goto out_err;
4739 }
4740 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4741
4742 out_err:
4743 kfree(mask_str);
4744
4745 return count;
4746 }
4747
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4748 int tracing_set_cpumask(struct trace_array *tr,
4749 cpumask_var_t tracing_cpumask_new)
4750 {
4751 int cpu;
4752
4753 if (!tr)
4754 return -EINVAL;
4755
4756 local_irq_disable();
4757 arch_spin_lock(&tr->max_lock);
4758 for_each_tracing_cpu(cpu) {
4759 /*
4760 * Increase/decrease the disabled counter if we are
4761 * about to flip a bit in the cpumask:
4762 */
4763 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4764 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4765 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4766 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4767 }
4768 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4769 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4770 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4771 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4772 }
4773 }
4774 arch_spin_unlock(&tr->max_lock);
4775 local_irq_enable();
4776
4777 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4778
4779 return 0;
4780 }
4781
4782 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4783 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4784 size_t count, loff_t *ppos)
4785 {
4786 struct trace_array *tr = file_inode(filp)->i_private;
4787 cpumask_var_t tracing_cpumask_new;
4788 int err;
4789
4790 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4791 return -ENOMEM;
4792
4793 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4794 if (err)
4795 goto err_free;
4796
4797 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4798 if (err)
4799 goto err_free;
4800
4801 free_cpumask_var(tracing_cpumask_new);
4802
4803 return count;
4804
4805 err_free:
4806 free_cpumask_var(tracing_cpumask_new);
4807
4808 return err;
4809 }
4810
4811 static const struct file_operations tracing_cpumask_fops = {
4812 .open = tracing_open_generic_tr,
4813 .read = tracing_cpumask_read,
4814 .write = tracing_cpumask_write,
4815 .release = tracing_release_generic_tr,
4816 .llseek = generic_file_llseek,
4817 };
4818
tracing_trace_options_show(struct seq_file * m,void * v)4819 static int tracing_trace_options_show(struct seq_file *m, void *v)
4820 {
4821 struct tracer_opt *trace_opts;
4822 struct trace_array *tr = m->private;
4823 u32 tracer_flags;
4824 int i;
4825
4826 mutex_lock(&trace_types_lock);
4827 tracer_flags = tr->current_trace->flags->val;
4828 trace_opts = tr->current_trace->flags->opts;
4829
4830 for (i = 0; trace_options[i]; i++) {
4831 if (tr->trace_flags & (1 << i))
4832 seq_printf(m, "%s\n", trace_options[i]);
4833 else
4834 seq_printf(m, "no%s\n", trace_options[i]);
4835 }
4836
4837 for (i = 0; trace_opts[i].name; i++) {
4838 if (tracer_flags & trace_opts[i].bit)
4839 seq_printf(m, "%s\n", trace_opts[i].name);
4840 else
4841 seq_printf(m, "no%s\n", trace_opts[i].name);
4842 }
4843 mutex_unlock(&trace_types_lock);
4844
4845 return 0;
4846 }
4847
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4848 static int __set_tracer_option(struct trace_array *tr,
4849 struct tracer_flags *tracer_flags,
4850 struct tracer_opt *opts, int neg)
4851 {
4852 struct tracer *trace = tracer_flags->trace;
4853 int ret;
4854
4855 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4856 if (ret)
4857 return ret;
4858
4859 if (neg)
4860 tracer_flags->val &= ~opts->bit;
4861 else
4862 tracer_flags->val |= opts->bit;
4863 return 0;
4864 }
4865
4866 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4867 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4868 {
4869 struct tracer *trace = tr->current_trace;
4870 struct tracer_flags *tracer_flags = trace->flags;
4871 struct tracer_opt *opts = NULL;
4872 int i;
4873
4874 for (i = 0; tracer_flags->opts[i].name; i++) {
4875 opts = &tracer_flags->opts[i];
4876
4877 if (strcmp(cmp, opts->name) == 0)
4878 return __set_tracer_option(tr, trace->flags, opts, neg);
4879 }
4880
4881 return -EINVAL;
4882 }
4883
4884 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4885 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4886 {
4887 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4888 return -1;
4889
4890 return 0;
4891 }
4892
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4893 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4894 {
4895 int *map;
4896
4897 if ((mask == TRACE_ITER_RECORD_TGID) ||
4898 (mask == TRACE_ITER_RECORD_CMD))
4899 lockdep_assert_held(&event_mutex);
4900
4901 /* do nothing if flag is already set */
4902 if (!!(tr->trace_flags & mask) == !!enabled)
4903 return 0;
4904
4905 /* Give the tracer a chance to approve the change */
4906 if (tr->current_trace->flag_changed)
4907 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4908 return -EINVAL;
4909
4910 if (enabled)
4911 tr->trace_flags |= mask;
4912 else
4913 tr->trace_flags &= ~mask;
4914
4915 if (mask == TRACE_ITER_RECORD_CMD)
4916 trace_event_enable_cmd_record(enabled);
4917
4918 if (mask == TRACE_ITER_RECORD_TGID) {
4919 if (!tgid_map) {
4920 tgid_map_max = pid_max;
4921 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4922 GFP_KERNEL);
4923
4924 /*
4925 * Pairs with smp_load_acquire() in
4926 * trace_find_tgid_ptr() to ensure that if it observes
4927 * the tgid_map we just allocated then it also observes
4928 * the corresponding tgid_map_max value.
4929 */
4930 smp_store_release(&tgid_map, map);
4931 }
4932 if (!tgid_map) {
4933 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4934 return -ENOMEM;
4935 }
4936
4937 trace_event_enable_tgid_record(enabled);
4938 }
4939
4940 if (mask == TRACE_ITER_EVENT_FORK)
4941 trace_event_follow_fork(tr, enabled);
4942
4943 if (mask == TRACE_ITER_FUNC_FORK)
4944 ftrace_pid_follow_fork(tr, enabled);
4945
4946 if (mask == TRACE_ITER_OVERWRITE) {
4947 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4948 #ifdef CONFIG_TRACER_MAX_TRACE
4949 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4950 #endif
4951 }
4952
4953 if (mask == TRACE_ITER_PRINTK) {
4954 trace_printk_start_stop_comm(enabled);
4955 trace_printk_control(enabled);
4956 }
4957
4958 return 0;
4959 }
4960
trace_set_options(struct trace_array * tr,char * option)4961 int trace_set_options(struct trace_array *tr, char *option)
4962 {
4963 char *cmp;
4964 int neg = 0;
4965 int ret;
4966 size_t orig_len = strlen(option);
4967 int len;
4968
4969 cmp = strstrip(option);
4970
4971 len = str_has_prefix(cmp, "no");
4972 if (len)
4973 neg = 1;
4974
4975 cmp += len;
4976
4977 mutex_lock(&event_mutex);
4978 mutex_lock(&trace_types_lock);
4979
4980 ret = match_string(trace_options, -1, cmp);
4981 /* If no option could be set, test the specific tracer options */
4982 if (ret < 0)
4983 ret = set_tracer_option(tr, cmp, neg);
4984 else
4985 ret = set_tracer_flag(tr, 1 << ret, !neg);
4986
4987 mutex_unlock(&trace_types_lock);
4988 mutex_unlock(&event_mutex);
4989
4990 /*
4991 * If the first trailing whitespace is replaced with '\0' by strstrip,
4992 * turn it back into a space.
4993 */
4994 if (orig_len > strlen(option))
4995 option[strlen(option)] = ' ';
4996
4997 return ret;
4998 }
4999
apply_trace_boot_options(void)5000 static void __init apply_trace_boot_options(void)
5001 {
5002 char *buf = trace_boot_options_buf;
5003 char *option;
5004
5005 while (true) {
5006 option = strsep(&buf, ",");
5007
5008 if (!option)
5009 break;
5010
5011 if (*option)
5012 trace_set_options(&global_trace, option);
5013
5014 /* Put back the comma to allow this to be called again */
5015 if (buf)
5016 *(buf - 1) = ',';
5017 }
5018 }
5019
5020 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5021 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5022 size_t cnt, loff_t *ppos)
5023 {
5024 struct seq_file *m = filp->private_data;
5025 struct trace_array *tr = m->private;
5026 char buf[64];
5027 int ret;
5028
5029 if (cnt >= sizeof(buf))
5030 return -EINVAL;
5031
5032 if (copy_from_user(buf, ubuf, cnt))
5033 return -EFAULT;
5034
5035 buf[cnt] = 0;
5036
5037 ret = trace_set_options(tr, buf);
5038 if (ret < 0)
5039 return ret;
5040
5041 *ppos += cnt;
5042
5043 return cnt;
5044 }
5045
tracing_trace_options_open(struct inode * inode,struct file * file)5046 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5047 {
5048 struct trace_array *tr = inode->i_private;
5049 int ret;
5050
5051 ret = tracing_check_open_get_tr(tr);
5052 if (ret)
5053 return ret;
5054
5055 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5056 if (ret < 0)
5057 trace_array_put(tr);
5058
5059 return ret;
5060 }
5061
5062 static const struct file_operations tracing_iter_fops = {
5063 .open = tracing_trace_options_open,
5064 .read = seq_read,
5065 .llseek = seq_lseek,
5066 .release = tracing_single_release_tr,
5067 .write = tracing_trace_options_write,
5068 };
5069
5070 static const char readme_msg[] =
5071 "tracing mini-HOWTO:\n\n"
5072 "# echo 0 > tracing_on : quick way to disable tracing\n"
5073 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5074 " Important files:\n"
5075 " trace\t\t\t- The static contents of the buffer\n"
5076 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5077 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5078 " current_tracer\t- function and latency tracers\n"
5079 " available_tracers\t- list of configured tracers for current_tracer\n"
5080 " error_log\t- error log for failed commands (that support it)\n"
5081 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5082 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5083 " trace_clock\t\t-change the clock used to order events\n"
5084 " local: Per cpu clock but may not be synced across CPUs\n"
5085 " global: Synced across CPUs but slows tracing down.\n"
5086 " counter: Not a clock, but just an increment\n"
5087 " uptime: Jiffy counter from time of boot\n"
5088 " perf: Same clock that perf events use\n"
5089 #ifdef CONFIG_X86_64
5090 " x86-tsc: TSC cycle counter\n"
5091 #endif
5092 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5093 " delta: Delta difference against a buffer-wide timestamp\n"
5094 " absolute: Absolute (standalone) timestamp\n"
5095 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5096 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5097 " tracing_cpumask\t- Limit which CPUs to trace\n"
5098 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5099 "\t\t\t Remove sub-buffer with rmdir\n"
5100 " trace_options\t\t- Set format or modify how tracing happens\n"
5101 "\t\t\t Disable an option by prefixing 'no' to the\n"
5102 "\t\t\t option name\n"
5103 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5104 #ifdef CONFIG_DYNAMIC_FTRACE
5105 "\n available_filter_functions - list of functions that can be filtered on\n"
5106 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5107 "\t\t\t functions\n"
5108 "\t accepts: func_full_name or glob-matching-pattern\n"
5109 "\t modules: Can select a group via module\n"
5110 "\t Format: :mod:<module-name>\n"
5111 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5112 "\t triggers: a command to perform when function is hit\n"
5113 "\t Format: <function>:<trigger>[:count]\n"
5114 "\t trigger: traceon, traceoff\n"
5115 "\t\t enable_event:<system>:<event>\n"
5116 "\t\t disable_event:<system>:<event>\n"
5117 #ifdef CONFIG_STACKTRACE
5118 "\t\t stacktrace\n"
5119 #endif
5120 #ifdef CONFIG_TRACER_SNAPSHOT
5121 "\t\t snapshot\n"
5122 #endif
5123 "\t\t dump\n"
5124 "\t\t cpudump\n"
5125 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5126 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5127 "\t The first one will disable tracing every time do_fault is hit\n"
5128 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5129 "\t The first time do trap is hit and it disables tracing, the\n"
5130 "\t counter will decrement to 2. If tracing is already disabled,\n"
5131 "\t the counter will not decrement. It only decrements when the\n"
5132 "\t trigger did work\n"
5133 "\t To remove trigger without count:\n"
5134 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5135 "\t To remove trigger with a count:\n"
5136 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5137 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5138 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5139 "\t modules: Can select a group via module command :mod:\n"
5140 "\t Does not accept triggers\n"
5141 #endif /* CONFIG_DYNAMIC_FTRACE */
5142 #ifdef CONFIG_FUNCTION_TRACER
5143 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5144 "\t\t (function)\n"
5145 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5146 "\t\t (function)\n"
5147 #endif
5148 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5149 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5150 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5151 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5152 #endif
5153 #ifdef CONFIG_TRACER_SNAPSHOT
5154 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5155 "\t\t\t snapshot buffer. Read the contents for more\n"
5156 "\t\t\t information\n"
5157 #endif
5158 #ifdef CONFIG_STACK_TRACER
5159 " stack_trace\t\t- Shows the max stack trace when active\n"
5160 " stack_max_size\t- Shows current max stack size that was traced\n"
5161 "\t\t\t Write into this file to reset the max size (trigger a\n"
5162 "\t\t\t new trace)\n"
5163 #ifdef CONFIG_DYNAMIC_FTRACE
5164 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5165 "\t\t\t traces\n"
5166 #endif
5167 #endif /* CONFIG_STACK_TRACER */
5168 #ifdef CONFIG_DYNAMIC_EVENTS
5169 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5170 "\t\t\t Write into this file to define/undefine new trace events.\n"
5171 #endif
5172 #ifdef CONFIG_KPROBE_EVENTS
5173 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5174 "\t\t\t Write into this file to define/undefine new trace events.\n"
5175 #endif
5176 #ifdef CONFIG_UPROBE_EVENTS
5177 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5178 "\t\t\t Write into this file to define/undefine new trace events.\n"
5179 #endif
5180 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5181 "\t accepts: event-definitions (one definition per line)\n"
5182 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5183 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5184 #ifdef CONFIG_HIST_TRIGGERS
5185 "\t s:[synthetic/]<event> <field> [<field>]\n"
5186 #endif
5187 "\t -:[<group>/]<event>\n"
5188 #ifdef CONFIG_KPROBE_EVENTS
5189 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5190 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5191 #endif
5192 #ifdef CONFIG_UPROBE_EVENTS
5193 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5194 #endif
5195 "\t args: <name>=fetcharg[:type]\n"
5196 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5197 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5198 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5199 #else
5200 "\t $stack<index>, $stack, $retval, $comm,\n"
5201 #endif
5202 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5203 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5204 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5205 "\t <type>\\[<array-size>\\]\n"
5206 #ifdef CONFIG_HIST_TRIGGERS
5207 "\t field: <stype> <name>;\n"
5208 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5209 "\t [unsigned] char/int/long\n"
5210 #endif
5211 #endif
5212 " events/\t\t- Directory containing all trace event subsystems:\n"
5213 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5214 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5215 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5216 "\t\t\t events\n"
5217 " filter\t\t- If set, only events passing filter are traced\n"
5218 " events/<system>/<event>/\t- Directory containing control files for\n"
5219 "\t\t\t <event>:\n"
5220 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5221 " filter\t\t- If set, only events passing filter are traced\n"
5222 " trigger\t\t- If set, a command to perform when event is hit\n"
5223 "\t Format: <trigger>[:count][if <filter>]\n"
5224 "\t trigger: traceon, traceoff\n"
5225 "\t enable_event:<system>:<event>\n"
5226 "\t disable_event:<system>:<event>\n"
5227 #ifdef CONFIG_HIST_TRIGGERS
5228 "\t enable_hist:<system>:<event>\n"
5229 "\t disable_hist:<system>:<event>\n"
5230 #endif
5231 #ifdef CONFIG_STACKTRACE
5232 "\t\t stacktrace\n"
5233 #endif
5234 #ifdef CONFIG_TRACER_SNAPSHOT
5235 "\t\t snapshot\n"
5236 #endif
5237 #ifdef CONFIG_HIST_TRIGGERS
5238 "\t\t hist (see below)\n"
5239 #endif
5240 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5241 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5242 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5243 "\t events/block/block_unplug/trigger\n"
5244 "\t The first disables tracing every time block_unplug is hit.\n"
5245 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5246 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5247 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5248 "\t Like function triggers, the counter is only decremented if it\n"
5249 "\t enabled or disabled tracing.\n"
5250 "\t To remove a trigger without a count:\n"
5251 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5252 "\t To remove a trigger with a count:\n"
5253 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5254 "\t Filters can be ignored when removing a trigger.\n"
5255 #ifdef CONFIG_HIST_TRIGGERS
5256 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5257 "\t Format: hist:keys=<field1[,field2,...]>\n"
5258 "\t [:values=<field1[,field2,...]>]\n"
5259 "\t [:sort=<field1[,field2,...]>]\n"
5260 "\t [:size=#entries]\n"
5261 "\t [:pause][:continue][:clear]\n"
5262 "\t [:name=histname1]\n"
5263 "\t [:<handler>.<action>]\n"
5264 "\t [if <filter>]\n\n"
5265 "\t Note, special fields can be used as well:\n"
5266 "\t common_timestamp - to record current timestamp\n"
5267 "\t common_cpu - to record the CPU the event happened on\n"
5268 "\n"
5269 "\t When a matching event is hit, an entry is added to a hash\n"
5270 "\t table using the key(s) and value(s) named, and the value of a\n"
5271 "\t sum called 'hitcount' is incremented. Keys and values\n"
5272 "\t correspond to fields in the event's format description. Keys\n"
5273 "\t can be any field, or the special string 'stacktrace'.\n"
5274 "\t Compound keys consisting of up to two fields can be specified\n"
5275 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5276 "\t fields. Sort keys consisting of up to two fields can be\n"
5277 "\t specified using the 'sort' keyword. The sort direction can\n"
5278 "\t be modified by appending '.descending' or '.ascending' to a\n"
5279 "\t sort field. The 'size' parameter can be used to specify more\n"
5280 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5281 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5282 "\t its histogram data will be shared with other triggers of the\n"
5283 "\t same name, and trigger hits will update this common data.\n\n"
5284 "\t Reading the 'hist' file for the event will dump the hash\n"
5285 "\t table in its entirety to stdout. If there are multiple hist\n"
5286 "\t triggers attached to an event, there will be a table for each\n"
5287 "\t trigger in the output. The table displayed for a named\n"
5288 "\t trigger will be the same as any other instance having the\n"
5289 "\t same name. The default format used to display a given field\n"
5290 "\t can be modified by appending any of the following modifiers\n"
5291 "\t to the field name, as applicable:\n\n"
5292 "\t .hex display a number as a hex value\n"
5293 "\t .sym display an address as a symbol\n"
5294 "\t .sym-offset display an address as a symbol and offset\n"
5295 "\t .execname display a common_pid as a program name\n"
5296 "\t .syscall display a syscall id as a syscall name\n"
5297 "\t .log2 display log2 value rather than raw number\n"
5298 "\t .usecs display a common_timestamp in microseconds\n\n"
5299 "\t The 'pause' parameter can be used to pause an existing hist\n"
5300 "\t trigger or to start a hist trigger but not log any events\n"
5301 "\t until told to do so. 'continue' can be used to start or\n"
5302 "\t restart a paused hist trigger.\n\n"
5303 "\t The 'clear' parameter will clear the contents of a running\n"
5304 "\t hist trigger and leave its current paused/active state\n"
5305 "\t unchanged.\n\n"
5306 "\t The enable_hist and disable_hist triggers can be used to\n"
5307 "\t have one event conditionally start and stop another event's\n"
5308 "\t already-attached hist trigger. The syntax is analogous to\n"
5309 "\t the enable_event and disable_event triggers.\n\n"
5310 "\t Hist trigger handlers and actions are executed whenever a\n"
5311 "\t a histogram entry is added or updated. They take the form:\n\n"
5312 "\t <handler>.<action>\n\n"
5313 "\t The available handlers are:\n\n"
5314 "\t onmatch(matching.event) - invoke on addition or update\n"
5315 "\t onmax(var) - invoke if var exceeds current max\n"
5316 "\t onchange(var) - invoke action if var changes\n\n"
5317 "\t The available actions are:\n\n"
5318 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5319 "\t save(field,...) - save current event fields\n"
5320 #ifdef CONFIG_TRACER_SNAPSHOT
5321 "\t snapshot() - snapshot the trace buffer\n\n"
5322 #endif
5323 #ifdef CONFIG_SYNTH_EVENTS
5324 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5325 "\t Write into this file to define/undefine new synthetic events.\n"
5326 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5327 #endif
5328 #endif
5329 ;
5330
5331 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5332 tracing_readme_read(struct file *filp, char __user *ubuf,
5333 size_t cnt, loff_t *ppos)
5334 {
5335 return simple_read_from_buffer(ubuf, cnt, ppos,
5336 readme_msg, strlen(readme_msg));
5337 }
5338
5339 static const struct file_operations tracing_readme_fops = {
5340 .open = tracing_open_generic,
5341 .read = tracing_readme_read,
5342 .llseek = generic_file_llseek,
5343 };
5344
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5345 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5346 {
5347 int pid = ++(*pos);
5348
5349 return trace_find_tgid_ptr(pid);
5350 }
5351
saved_tgids_start(struct seq_file * m,loff_t * pos)5352 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5353 {
5354 int pid = *pos;
5355
5356 return trace_find_tgid_ptr(pid);
5357 }
5358
saved_tgids_stop(struct seq_file * m,void * v)5359 static void saved_tgids_stop(struct seq_file *m, void *v)
5360 {
5361 }
5362
saved_tgids_show(struct seq_file * m,void * v)5363 static int saved_tgids_show(struct seq_file *m, void *v)
5364 {
5365 int *entry = (int *)v;
5366 int pid = entry - tgid_map;
5367 int tgid = *entry;
5368
5369 if (tgid == 0)
5370 return SEQ_SKIP;
5371
5372 seq_printf(m, "%d %d\n", pid, tgid);
5373 return 0;
5374 }
5375
5376 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5377 .start = saved_tgids_start,
5378 .stop = saved_tgids_stop,
5379 .next = saved_tgids_next,
5380 .show = saved_tgids_show,
5381 };
5382
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5383 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5384 {
5385 int ret;
5386
5387 ret = tracing_check_open_get_tr(NULL);
5388 if (ret)
5389 return ret;
5390
5391 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5392 }
5393
5394
5395 static const struct file_operations tracing_saved_tgids_fops = {
5396 .open = tracing_saved_tgids_open,
5397 .read = seq_read,
5398 .llseek = seq_lseek,
5399 .release = seq_release,
5400 };
5401
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5402 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5403 {
5404 unsigned int *ptr = v;
5405
5406 if (*pos || m->count)
5407 ptr++;
5408
5409 (*pos)++;
5410
5411 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5412 ptr++) {
5413 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5414 continue;
5415
5416 return ptr;
5417 }
5418
5419 return NULL;
5420 }
5421
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5422 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5423 {
5424 void *v;
5425 loff_t l = 0;
5426
5427 preempt_disable();
5428 arch_spin_lock(&trace_cmdline_lock);
5429
5430 v = &savedcmd->map_cmdline_to_pid[0];
5431 while (l <= *pos) {
5432 v = saved_cmdlines_next(m, v, &l);
5433 if (!v)
5434 return NULL;
5435 }
5436
5437 return v;
5438 }
5439
saved_cmdlines_stop(struct seq_file * m,void * v)5440 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5441 {
5442 arch_spin_unlock(&trace_cmdline_lock);
5443 preempt_enable();
5444 }
5445
saved_cmdlines_show(struct seq_file * m,void * v)5446 static int saved_cmdlines_show(struct seq_file *m, void *v)
5447 {
5448 char buf[TASK_COMM_LEN];
5449 unsigned int *pid = v;
5450
5451 __trace_find_cmdline(*pid, buf);
5452 seq_printf(m, "%d %s\n", *pid, buf);
5453 return 0;
5454 }
5455
5456 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5457 .start = saved_cmdlines_start,
5458 .next = saved_cmdlines_next,
5459 .stop = saved_cmdlines_stop,
5460 .show = saved_cmdlines_show,
5461 };
5462
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5463 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5464 {
5465 int ret;
5466
5467 ret = tracing_check_open_get_tr(NULL);
5468 if (ret)
5469 return ret;
5470
5471 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5472 }
5473
5474 static const struct file_operations tracing_saved_cmdlines_fops = {
5475 .open = tracing_saved_cmdlines_open,
5476 .read = seq_read,
5477 .llseek = seq_lseek,
5478 .release = seq_release,
5479 };
5480
5481 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5482 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5483 size_t cnt, loff_t *ppos)
5484 {
5485 char buf[64];
5486 int r;
5487
5488 preempt_disable();
5489 arch_spin_lock(&trace_cmdline_lock);
5490 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5491 arch_spin_unlock(&trace_cmdline_lock);
5492 preempt_enable();
5493
5494 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5495 }
5496
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)5497 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5498 {
5499 kfree(s->saved_cmdlines);
5500 kfree(s->map_cmdline_to_pid);
5501 kfree(s);
5502 }
5503
tracing_resize_saved_cmdlines(unsigned int val)5504 static int tracing_resize_saved_cmdlines(unsigned int val)
5505 {
5506 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5507
5508 s = kmalloc(sizeof(*s), GFP_KERNEL);
5509 if (!s)
5510 return -ENOMEM;
5511
5512 if (allocate_cmdlines_buffer(val, s) < 0) {
5513 kfree(s);
5514 return -ENOMEM;
5515 }
5516
5517 preempt_disable();
5518 arch_spin_lock(&trace_cmdline_lock);
5519 savedcmd_temp = savedcmd;
5520 savedcmd = s;
5521 arch_spin_unlock(&trace_cmdline_lock);
5522 preempt_enable();
5523 free_saved_cmdlines_buffer(savedcmd_temp);
5524
5525 return 0;
5526 }
5527
5528 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5529 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5530 size_t cnt, loff_t *ppos)
5531 {
5532 unsigned long val;
5533 int ret;
5534
5535 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5536 if (ret)
5537 return ret;
5538
5539 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5540 if (!val || val > PID_MAX_DEFAULT)
5541 return -EINVAL;
5542
5543 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5544 if (ret < 0)
5545 return ret;
5546
5547 *ppos += cnt;
5548
5549 return cnt;
5550 }
5551
5552 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5553 .open = tracing_open_generic,
5554 .read = tracing_saved_cmdlines_size_read,
5555 .write = tracing_saved_cmdlines_size_write,
5556 };
5557
5558 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5559 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5560 update_eval_map(union trace_eval_map_item *ptr)
5561 {
5562 if (!ptr->map.eval_string) {
5563 if (ptr->tail.next) {
5564 ptr = ptr->tail.next;
5565 /* Set ptr to the next real item (skip head) */
5566 ptr++;
5567 } else
5568 return NULL;
5569 }
5570 return ptr;
5571 }
5572
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5573 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5574 {
5575 union trace_eval_map_item *ptr = v;
5576
5577 /*
5578 * Paranoid! If ptr points to end, we don't want to increment past it.
5579 * This really should never happen.
5580 */
5581 (*pos)++;
5582 ptr = update_eval_map(ptr);
5583 if (WARN_ON_ONCE(!ptr))
5584 return NULL;
5585
5586 ptr++;
5587 ptr = update_eval_map(ptr);
5588
5589 return ptr;
5590 }
5591
eval_map_start(struct seq_file * m,loff_t * pos)5592 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5593 {
5594 union trace_eval_map_item *v;
5595 loff_t l = 0;
5596
5597 mutex_lock(&trace_eval_mutex);
5598
5599 v = trace_eval_maps;
5600 if (v)
5601 v++;
5602
5603 while (v && l < *pos) {
5604 v = eval_map_next(m, v, &l);
5605 }
5606
5607 return v;
5608 }
5609
eval_map_stop(struct seq_file * m,void * v)5610 static void eval_map_stop(struct seq_file *m, void *v)
5611 {
5612 mutex_unlock(&trace_eval_mutex);
5613 }
5614
eval_map_show(struct seq_file * m,void * v)5615 static int eval_map_show(struct seq_file *m, void *v)
5616 {
5617 union trace_eval_map_item *ptr = v;
5618
5619 seq_printf(m, "%s %ld (%s)\n",
5620 ptr->map.eval_string, ptr->map.eval_value,
5621 ptr->map.system);
5622
5623 return 0;
5624 }
5625
5626 static const struct seq_operations tracing_eval_map_seq_ops = {
5627 .start = eval_map_start,
5628 .next = eval_map_next,
5629 .stop = eval_map_stop,
5630 .show = eval_map_show,
5631 };
5632
tracing_eval_map_open(struct inode * inode,struct file * filp)5633 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5634 {
5635 int ret;
5636
5637 ret = tracing_check_open_get_tr(NULL);
5638 if (ret)
5639 return ret;
5640
5641 return seq_open(filp, &tracing_eval_map_seq_ops);
5642 }
5643
5644 static const struct file_operations tracing_eval_map_fops = {
5645 .open = tracing_eval_map_open,
5646 .read = seq_read,
5647 .llseek = seq_lseek,
5648 .release = seq_release,
5649 };
5650
5651 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5652 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5653 {
5654 /* Return tail of array given the head */
5655 return ptr + ptr->head.length + 1;
5656 }
5657
5658 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5659 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5660 int len)
5661 {
5662 struct trace_eval_map **stop;
5663 struct trace_eval_map **map;
5664 union trace_eval_map_item *map_array;
5665 union trace_eval_map_item *ptr;
5666
5667 stop = start + len;
5668
5669 /*
5670 * The trace_eval_maps contains the map plus a head and tail item,
5671 * where the head holds the module and length of array, and the
5672 * tail holds a pointer to the next list.
5673 */
5674 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5675 if (!map_array) {
5676 pr_warn("Unable to allocate trace eval mapping\n");
5677 return;
5678 }
5679
5680 mutex_lock(&trace_eval_mutex);
5681
5682 if (!trace_eval_maps)
5683 trace_eval_maps = map_array;
5684 else {
5685 ptr = trace_eval_maps;
5686 for (;;) {
5687 ptr = trace_eval_jmp_to_tail(ptr);
5688 if (!ptr->tail.next)
5689 break;
5690 ptr = ptr->tail.next;
5691
5692 }
5693 ptr->tail.next = map_array;
5694 }
5695 map_array->head.mod = mod;
5696 map_array->head.length = len;
5697 map_array++;
5698
5699 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5700 map_array->map = **map;
5701 map_array++;
5702 }
5703 memset(map_array, 0, sizeof(*map_array));
5704
5705 mutex_unlock(&trace_eval_mutex);
5706 }
5707
trace_create_eval_file(struct dentry * d_tracer)5708 static void trace_create_eval_file(struct dentry *d_tracer)
5709 {
5710 trace_create_file("eval_map", 0444, d_tracer,
5711 NULL, &tracing_eval_map_fops);
5712 }
5713
5714 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5715 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5716 static inline void trace_insert_eval_map_file(struct module *mod,
5717 struct trace_eval_map **start, int len) { }
5718 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5719
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5720 static void trace_insert_eval_map(struct module *mod,
5721 struct trace_eval_map **start, int len)
5722 {
5723 struct trace_eval_map **map;
5724
5725 if (len <= 0)
5726 return;
5727
5728 map = start;
5729
5730 trace_event_eval_update(map, len);
5731
5732 trace_insert_eval_map_file(mod, start, len);
5733 }
5734
5735 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5736 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5737 size_t cnt, loff_t *ppos)
5738 {
5739 struct trace_array *tr = filp->private_data;
5740 char buf[MAX_TRACER_SIZE+2];
5741 int r;
5742
5743 mutex_lock(&trace_types_lock);
5744 r = sprintf(buf, "%s\n", tr->current_trace->name);
5745 mutex_unlock(&trace_types_lock);
5746
5747 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5748 }
5749
tracer_init(struct tracer * t,struct trace_array * tr)5750 int tracer_init(struct tracer *t, struct trace_array *tr)
5751 {
5752 tracing_reset_online_cpus(&tr->array_buffer);
5753 return t->init(tr);
5754 }
5755
set_buffer_entries(struct array_buffer * buf,unsigned long val)5756 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5757 {
5758 int cpu;
5759
5760 for_each_tracing_cpu(cpu)
5761 per_cpu_ptr(buf->data, cpu)->entries = val;
5762 }
5763
5764 #ifdef CONFIG_TRACER_MAX_TRACE
5765 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5766 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5767 struct array_buffer *size_buf, int cpu_id)
5768 {
5769 int cpu, ret = 0;
5770
5771 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5772 for_each_tracing_cpu(cpu) {
5773 ret = ring_buffer_resize(trace_buf->buffer,
5774 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5775 if (ret < 0)
5776 break;
5777 per_cpu_ptr(trace_buf->data, cpu)->entries =
5778 per_cpu_ptr(size_buf->data, cpu)->entries;
5779 }
5780 } else {
5781 ret = ring_buffer_resize(trace_buf->buffer,
5782 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5783 if (ret == 0)
5784 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5785 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5786 }
5787
5788 return ret;
5789 }
5790 #endif /* CONFIG_TRACER_MAX_TRACE */
5791
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5792 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5793 unsigned long size, int cpu)
5794 {
5795 int ret;
5796
5797 /*
5798 * If kernel or user changes the size of the ring buffer
5799 * we use the size that was given, and we can forget about
5800 * expanding it later.
5801 */
5802 ring_buffer_expanded = true;
5803
5804 /* May be called before buffers are initialized */
5805 if (!tr->array_buffer.buffer)
5806 return 0;
5807
5808 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5809 if (ret < 0)
5810 return ret;
5811
5812 #ifdef CONFIG_TRACER_MAX_TRACE
5813 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5814 !tr->current_trace->use_max_tr)
5815 goto out;
5816
5817 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5818 if (ret < 0) {
5819 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5820 &tr->array_buffer, cpu);
5821 if (r < 0) {
5822 /*
5823 * AARGH! We are left with different
5824 * size max buffer!!!!
5825 * The max buffer is our "snapshot" buffer.
5826 * When a tracer needs a snapshot (one of the
5827 * latency tracers), it swaps the max buffer
5828 * with the saved snap shot. We succeeded to
5829 * update the size of the main buffer, but failed to
5830 * update the size of the max buffer. But when we tried
5831 * to reset the main buffer to the original size, we
5832 * failed there too. This is very unlikely to
5833 * happen, but if it does, warn and kill all
5834 * tracing.
5835 */
5836 WARN_ON(1);
5837 tracing_disabled = 1;
5838 }
5839 return ret;
5840 }
5841
5842 if (cpu == RING_BUFFER_ALL_CPUS)
5843 set_buffer_entries(&tr->max_buffer, size);
5844 else
5845 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5846
5847 out:
5848 #endif /* CONFIG_TRACER_MAX_TRACE */
5849
5850 if (cpu == RING_BUFFER_ALL_CPUS)
5851 set_buffer_entries(&tr->array_buffer, size);
5852 else
5853 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5854
5855 return ret;
5856 }
5857
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5858 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5859 unsigned long size, int cpu_id)
5860 {
5861 int ret = size;
5862
5863 mutex_lock(&trace_types_lock);
5864
5865 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5866 /* make sure, this cpu is enabled in the mask */
5867 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5868 ret = -EINVAL;
5869 goto out;
5870 }
5871 }
5872
5873 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5874 if (ret < 0)
5875 ret = -ENOMEM;
5876
5877 out:
5878 mutex_unlock(&trace_types_lock);
5879
5880 return ret;
5881 }
5882
5883
5884 /**
5885 * tracing_update_buffers - used by tracing facility to expand ring buffers
5886 *
5887 * To save on memory when the tracing is never used on a system with it
5888 * configured in. The ring buffers are set to a minimum size. But once
5889 * a user starts to use the tracing facility, then they need to grow
5890 * to their default size.
5891 *
5892 * This function is to be called when a tracer is about to be used.
5893 */
tracing_update_buffers(void)5894 int tracing_update_buffers(void)
5895 {
5896 int ret = 0;
5897
5898 mutex_lock(&trace_types_lock);
5899 if (!ring_buffer_expanded)
5900 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5901 RING_BUFFER_ALL_CPUS);
5902 mutex_unlock(&trace_types_lock);
5903
5904 return ret;
5905 }
5906
5907 struct trace_option_dentry;
5908
5909 static void
5910 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5911
5912 /*
5913 * Used to clear out the tracer before deletion of an instance.
5914 * Must have trace_types_lock held.
5915 */
tracing_set_nop(struct trace_array * tr)5916 static void tracing_set_nop(struct trace_array *tr)
5917 {
5918 if (tr->current_trace == &nop_trace)
5919 return;
5920
5921 tr->current_trace->enabled--;
5922
5923 if (tr->current_trace->reset)
5924 tr->current_trace->reset(tr);
5925
5926 tr->current_trace = &nop_trace;
5927 }
5928
5929 static bool tracer_options_updated;
5930
add_tracer_options(struct trace_array * tr,struct tracer * t)5931 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5932 {
5933 /* Only enable if the directory has been created already. */
5934 if (!tr->dir)
5935 return;
5936
5937 /* Only create trace option files after update_tracer_options finish */
5938 if (!tracer_options_updated)
5939 return;
5940
5941 create_trace_option_files(tr, t);
5942 }
5943
tracing_set_tracer(struct trace_array * tr,const char * buf)5944 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5945 {
5946 struct tracer *t;
5947 #ifdef CONFIG_TRACER_MAX_TRACE
5948 bool had_max_tr;
5949 #endif
5950 int ret = 0;
5951
5952 mutex_lock(&trace_types_lock);
5953
5954 if (!ring_buffer_expanded) {
5955 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5956 RING_BUFFER_ALL_CPUS);
5957 if (ret < 0)
5958 goto out;
5959 ret = 0;
5960 }
5961
5962 for (t = trace_types; t; t = t->next) {
5963 if (strcmp(t->name, buf) == 0)
5964 break;
5965 }
5966 if (!t) {
5967 ret = -EINVAL;
5968 goto out;
5969 }
5970 if (t == tr->current_trace)
5971 goto out;
5972
5973 #ifdef CONFIG_TRACER_SNAPSHOT
5974 if (t->use_max_tr) {
5975 local_irq_disable();
5976 arch_spin_lock(&tr->max_lock);
5977 if (tr->cond_snapshot)
5978 ret = -EBUSY;
5979 arch_spin_unlock(&tr->max_lock);
5980 local_irq_enable();
5981 if (ret)
5982 goto out;
5983 }
5984 #endif
5985 /* Some tracers won't work on kernel command line */
5986 if (system_state < SYSTEM_RUNNING && t->noboot) {
5987 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5988 t->name);
5989 goto out;
5990 }
5991
5992 /* Some tracers are only allowed for the top level buffer */
5993 if (!trace_ok_for_array(t, tr)) {
5994 ret = -EINVAL;
5995 goto out;
5996 }
5997
5998 /* If trace pipe files are being read, we can't change the tracer */
5999 if (tr->trace_ref) {
6000 ret = -EBUSY;
6001 goto out;
6002 }
6003
6004 trace_branch_disable();
6005
6006 tr->current_trace->enabled--;
6007
6008 if (tr->current_trace->reset)
6009 tr->current_trace->reset(tr);
6010
6011 #ifdef CONFIG_TRACER_MAX_TRACE
6012 had_max_tr = tr->current_trace->use_max_tr;
6013
6014 /* Current trace needs to be nop_trace before synchronize_rcu */
6015 tr->current_trace = &nop_trace;
6016
6017 if (had_max_tr && !t->use_max_tr) {
6018 /*
6019 * We need to make sure that the update_max_tr sees that
6020 * current_trace changed to nop_trace to keep it from
6021 * swapping the buffers after we resize it.
6022 * The update_max_tr is called from interrupts disabled
6023 * so a synchronized_sched() is sufficient.
6024 */
6025 synchronize_rcu();
6026 free_snapshot(tr);
6027 }
6028
6029 if (t->use_max_tr && !tr->allocated_snapshot) {
6030 ret = tracing_alloc_snapshot_instance(tr);
6031 if (ret < 0)
6032 goto out;
6033 }
6034 #else
6035 tr->current_trace = &nop_trace;
6036 #endif
6037
6038 if (t->init) {
6039 ret = tracer_init(t, tr);
6040 if (ret)
6041 goto out;
6042 }
6043
6044 tr->current_trace = t;
6045 tr->current_trace->enabled++;
6046 trace_branch_enable(tr);
6047 out:
6048 mutex_unlock(&trace_types_lock);
6049
6050 return ret;
6051 }
6052
6053 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6054 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6055 size_t cnt, loff_t *ppos)
6056 {
6057 struct trace_array *tr = filp->private_data;
6058 char buf[MAX_TRACER_SIZE+1];
6059 int i;
6060 size_t ret;
6061 int err;
6062
6063 ret = cnt;
6064
6065 if (cnt > MAX_TRACER_SIZE)
6066 cnt = MAX_TRACER_SIZE;
6067
6068 if (copy_from_user(buf, ubuf, cnt))
6069 return -EFAULT;
6070
6071 buf[cnt] = 0;
6072
6073 /* strip ending whitespace. */
6074 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6075 buf[i] = 0;
6076
6077 err = tracing_set_tracer(tr, buf);
6078 if (err)
6079 return err;
6080
6081 *ppos += ret;
6082
6083 return ret;
6084 }
6085
6086 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6087 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6088 size_t cnt, loff_t *ppos)
6089 {
6090 char buf[64];
6091 int r;
6092
6093 r = snprintf(buf, sizeof(buf), "%ld\n",
6094 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6095 if (r > sizeof(buf))
6096 r = sizeof(buf);
6097 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6098 }
6099
6100 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6101 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6102 size_t cnt, loff_t *ppos)
6103 {
6104 unsigned long val;
6105 int ret;
6106
6107 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6108 if (ret)
6109 return ret;
6110
6111 *ptr = val * 1000;
6112
6113 return cnt;
6114 }
6115
6116 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6117 tracing_thresh_read(struct file *filp, char __user *ubuf,
6118 size_t cnt, loff_t *ppos)
6119 {
6120 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6121 }
6122
6123 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6124 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6125 size_t cnt, loff_t *ppos)
6126 {
6127 struct trace_array *tr = filp->private_data;
6128 int ret;
6129
6130 mutex_lock(&trace_types_lock);
6131 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6132 if (ret < 0)
6133 goto out;
6134
6135 if (tr->current_trace->update_thresh) {
6136 ret = tr->current_trace->update_thresh(tr);
6137 if (ret < 0)
6138 goto out;
6139 }
6140
6141 ret = cnt;
6142 out:
6143 mutex_unlock(&trace_types_lock);
6144
6145 return ret;
6146 }
6147
6148 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6149
6150 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6151 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6152 size_t cnt, loff_t *ppos)
6153 {
6154 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6155 }
6156
6157 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6158 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6159 size_t cnt, loff_t *ppos)
6160 {
6161 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6162 }
6163
6164 #endif
6165
tracing_open_pipe(struct inode * inode,struct file * filp)6166 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6167 {
6168 struct trace_array *tr = inode->i_private;
6169 struct trace_iterator *iter;
6170 int ret;
6171
6172 ret = tracing_check_open_get_tr(tr);
6173 if (ret)
6174 return ret;
6175
6176 mutex_lock(&trace_types_lock);
6177
6178 /* create a buffer to store the information to pass to userspace */
6179 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6180 if (!iter) {
6181 ret = -ENOMEM;
6182 __trace_array_put(tr);
6183 goto out;
6184 }
6185
6186 trace_seq_init(&iter->seq);
6187 iter->trace = tr->current_trace;
6188
6189 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6190 ret = -ENOMEM;
6191 goto fail;
6192 }
6193
6194 /* trace pipe does not show start of buffer */
6195 cpumask_setall(iter->started);
6196
6197 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6198 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6199
6200 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6201 if (trace_clocks[tr->clock_id].in_ns)
6202 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6203
6204 iter->tr = tr;
6205 iter->array_buffer = &tr->array_buffer;
6206 iter->cpu_file = tracing_get_cpu(inode);
6207 mutex_init(&iter->mutex);
6208 filp->private_data = iter;
6209
6210 if (iter->trace->pipe_open)
6211 iter->trace->pipe_open(iter);
6212
6213 nonseekable_open(inode, filp);
6214
6215 tr->trace_ref++;
6216 out:
6217 mutex_unlock(&trace_types_lock);
6218 return ret;
6219
6220 fail:
6221 kfree(iter);
6222 __trace_array_put(tr);
6223 mutex_unlock(&trace_types_lock);
6224 return ret;
6225 }
6226
tracing_release_pipe(struct inode * inode,struct file * file)6227 static int tracing_release_pipe(struct inode *inode, struct file *file)
6228 {
6229 struct trace_iterator *iter = file->private_data;
6230 struct trace_array *tr = inode->i_private;
6231
6232 mutex_lock(&trace_types_lock);
6233
6234 tr->trace_ref--;
6235
6236 if (iter->trace->pipe_close)
6237 iter->trace->pipe_close(iter);
6238
6239 mutex_unlock(&trace_types_lock);
6240
6241 free_cpumask_var(iter->started);
6242 mutex_destroy(&iter->mutex);
6243 kfree(iter);
6244
6245 trace_array_put(tr);
6246
6247 return 0;
6248 }
6249
6250 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6251 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6252 {
6253 struct trace_array *tr = iter->tr;
6254
6255 /* Iterators are static, they should be filled or empty */
6256 if (trace_buffer_iter(iter, iter->cpu_file))
6257 return EPOLLIN | EPOLLRDNORM;
6258
6259 if (tr->trace_flags & TRACE_ITER_BLOCK)
6260 /*
6261 * Always select as readable when in blocking mode
6262 */
6263 return EPOLLIN | EPOLLRDNORM;
6264 else
6265 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6266 filp, poll_table, iter->tr->buffer_percent);
6267 }
6268
6269 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6270 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6271 {
6272 struct trace_iterator *iter = filp->private_data;
6273
6274 return trace_poll(iter, filp, poll_table);
6275 }
6276
6277 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6278 static int tracing_wait_pipe(struct file *filp)
6279 {
6280 struct trace_iterator *iter = filp->private_data;
6281 int ret;
6282
6283 while (trace_empty(iter)) {
6284
6285 if ((filp->f_flags & O_NONBLOCK)) {
6286 return -EAGAIN;
6287 }
6288
6289 /*
6290 * We block until we read something and tracing is disabled.
6291 * We still block if tracing is disabled, but we have never
6292 * read anything. This allows a user to cat this file, and
6293 * then enable tracing. But after we have read something,
6294 * we give an EOF when tracing is again disabled.
6295 *
6296 * iter->pos will be 0 if we haven't read anything.
6297 */
6298 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6299 break;
6300
6301 mutex_unlock(&iter->mutex);
6302
6303 ret = wait_on_pipe(iter, 0);
6304
6305 mutex_lock(&iter->mutex);
6306
6307 if (ret)
6308 return ret;
6309 }
6310
6311 return 1;
6312 }
6313
6314 /*
6315 * Consumer reader.
6316 */
6317 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6318 tracing_read_pipe(struct file *filp, char __user *ubuf,
6319 size_t cnt, loff_t *ppos)
6320 {
6321 struct trace_iterator *iter = filp->private_data;
6322 ssize_t sret;
6323
6324 /*
6325 * Avoid more than one consumer on a single file descriptor
6326 * This is just a matter of traces coherency, the ring buffer itself
6327 * is protected.
6328 */
6329 mutex_lock(&iter->mutex);
6330
6331 /* return any leftover data */
6332 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6333 if (sret != -EBUSY)
6334 goto out;
6335
6336 trace_seq_init(&iter->seq);
6337
6338 if (iter->trace->read) {
6339 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6340 if (sret)
6341 goto out;
6342 }
6343
6344 waitagain:
6345 sret = tracing_wait_pipe(filp);
6346 if (sret <= 0)
6347 goto out;
6348
6349 /* stop when tracing is finished */
6350 if (trace_empty(iter)) {
6351 sret = 0;
6352 goto out;
6353 }
6354
6355 if (cnt >= PAGE_SIZE)
6356 cnt = PAGE_SIZE - 1;
6357
6358 /* reset all but tr, trace, and overruns */
6359 memset(&iter->seq, 0,
6360 sizeof(struct trace_iterator) -
6361 offsetof(struct trace_iterator, seq));
6362 cpumask_clear(iter->started);
6363 trace_seq_init(&iter->seq);
6364 iter->pos = -1;
6365
6366 trace_event_read_lock();
6367 trace_access_lock(iter->cpu_file);
6368 while (trace_find_next_entry_inc(iter) != NULL) {
6369 enum print_line_t ret;
6370 int save_len = iter->seq.seq.len;
6371
6372 ret = print_trace_line(iter);
6373 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6374 /*
6375 * If one print_trace_line() fills entire trace_seq in one shot,
6376 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6377 * In this case, we need to consume it, otherwise, loop will peek
6378 * this event next time, resulting in an infinite loop.
6379 */
6380 if (save_len == 0) {
6381 iter->seq.full = 0;
6382 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6383 trace_consume(iter);
6384 break;
6385 }
6386
6387 /* In other cases, don't print partial lines */
6388 iter->seq.seq.len = save_len;
6389 break;
6390 }
6391 if (ret != TRACE_TYPE_NO_CONSUME)
6392 trace_consume(iter);
6393
6394 if (trace_seq_used(&iter->seq) >= cnt)
6395 break;
6396
6397 /*
6398 * Setting the full flag means we reached the trace_seq buffer
6399 * size and we should leave by partial output condition above.
6400 * One of the trace_seq_* functions is not used properly.
6401 */
6402 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6403 iter->ent->type);
6404 }
6405 trace_access_unlock(iter->cpu_file);
6406 trace_event_read_unlock();
6407
6408 /* Now copy what we have to the user */
6409 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6410 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6411 trace_seq_init(&iter->seq);
6412
6413 /*
6414 * If there was nothing to send to user, in spite of consuming trace
6415 * entries, go back to wait for more entries.
6416 */
6417 if (sret == -EBUSY)
6418 goto waitagain;
6419
6420 out:
6421 mutex_unlock(&iter->mutex);
6422
6423 return sret;
6424 }
6425
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6426 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6427 unsigned int idx)
6428 {
6429 __free_page(spd->pages[idx]);
6430 }
6431
6432 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6433 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6434 {
6435 size_t count;
6436 int save_len;
6437 int ret;
6438
6439 /* Seq buffer is page-sized, exactly what we need. */
6440 for (;;) {
6441 save_len = iter->seq.seq.len;
6442 ret = print_trace_line(iter);
6443
6444 if (trace_seq_has_overflowed(&iter->seq)) {
6445 iter->seq.seq.len = save_len;
6446 break;
6447 }
6448
6449 /*
6450 * This should not be hit, because it should only
6451 * be set if the iter->seq overflowed. But check it
6452 * anyway to be safe.
6453 */
6454 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6455 iter->seq.seq.len = save_len;
6456 break;
6457 }
6458
6459 count = trace_seq_used(&iter->seq) - save_len;
6460 if (rem < count) {
6461 rem = 0;
6462 iter->seq.seq.len = save_len;
6463 break;
6464 }
6465
6466 if (ret != TRACE_TYPE_NO_CONSUME)
6467 trace_consume(iter);
6468 rem -= count;
6469 if (!trace_find_next_entry_inc(iter)) {
6470 rem = 0;
6471 iter->ent = NULL;
6472 break;
6473 }
6474 }
6475
6476 return rem;
6477 }
6478
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6479 static ssize_t tracing_splice_read_pipe(struct file *filp,
6480 loff_t *ppos,
6481 struct pipe_inode_info *pipe,
6482 size_t len,
6483 unsigned int flags)
6484 {
6485 struct page *pages_def[PIPE_DEF_BUFFERS];
6486 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6487 struct trace_iterator *iter = filp->private_data;
6488 struct splice_pipe_desc spd = {
6489 .pages = pages_def,
6490 .partial = partial_def,
6491 .nr_pages = 0, /* This gets updated below. */
6492 .nr_pages_max = PIPE_DEF_BUFFERS,
6493 .ops = &default_pipe_buf_ops,
6494 .spd_release = tracing_spd_release_pipe,
6495 };
6496 ssize_t ret;
6497 size_t rem;
6498 unsigned int i;
6499
6500 if (splice_grow_spd(pipe, &spd))
6501 return -ENOMEM;
6502
6503 mutex_lock(&iter->mutex);
6504
6505 if (iter->trace->splice_read) {
6506 ret = iter->trace->splice_read(iter, filp,
6507 ppos, pipe, len, flags);
6508 if (ret)
6509 goto out_err;
6510 }
6511
6512 ret = tracing_wait_pipe(filp);
6513 if (ret <= 0)
6514 goto out_err;
6515
6516 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6517 ret = -EFAULT;
6518 goto out_err;
6519 }
6520
6521 trace_event_read_lock();
6522 trace_access_lock(iter->cpu_file);
6523
6524 /* Fill as many pages as possible. */
6525 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6526 spd.pages[i] = alloc_page(GFP_KERNEL);
6527 if (!spd.pages[i])
6528 break;
6529
6530 rem = tracing_fill_pipe_page(rem, iter);
6531
6532 /* Copy the data into the page, so we can start over. */
6533 ret = trace_seq_to_buffer(&iter->seq,
6534 page_address(spd.pages[i]),
6535 trace_seq_used(&iter->seq));
6536 if (ret < 0) {
6537 __free_page(spd.pages[i]);
6538 break;
6539 }
6540 spd.partial[i].offset = 0;
6541 spd.partial[i].len = trace_seq_used(&iter->seq);
6542
6543 trace_seq_init(&iter->seq);
6544 }
6545
6546 trace_access_unlock(iter->cpu_file);
6547 trace_event_read_unlock();
6548 mutex_unlock(&iter->mutex);
6549
6550 spd.nr_pages = i;
6551
6552 if (i)
6553 ret = splice_to_pipe(pipe, &spd);
6554 else
6555 ret = 0;
6556 out:
6557 splice_shrink_spd(&spd);
6558 return ret;
6559
6560 out_err:
6561 mutex_unlock(&iter->mutex);
6562 goto out;
6563 }
6564
6565 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6566 tracing_entries_read(struct file *filp, char __user *ubuf,
6567 size_t cnt, loff_t *ppos)
6568 {
6569 struct inode *inode = file_inode(filp);
6570 struct trace_array *tr = inode->i_private;
6571 int cpu = tracing_get_cpu(inode);
6572 char buf[64];
6573 int r = 0;
6574 ssize_t ret;
6575
6576 mutex_lock(&trace_types_lock);
6577
6578 if (cpu == RING_BUFFER_ALL_CPUS) {
6579 int cpu, buf_size_same;
6580 unsigned long size;
6581
6582 size = 0;
6583 buf_size_same = 1;
6584 /* check if all cpu sizes are same */
6585 for_each_tracing_cpu(cpu) {
6586 /* fill in the size from first enabled cpu */
6587 if (size == 0)
6588 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6589 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6590 buf_size_same = 0;
6591 break;
6592 }
6593 }
6594
6595 if (buf_size_same) {
6596 if (!ring_buffer_expanded)
6597 r = sprintf(buf, "%lu (expanded: %lu)\n",
6598 size >> 10,
6599 trace_buf_size >> 10);
6600 else
6601 r = sprintf(buf, "%lu\n", size >> 10);
6602 } else
6603 r = sprintf(buf, "X\n");
6604 } else
6605 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6606
6607 mutex_unlock(&trace_types_lock);
6608
6609 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6610 return ret;
6611 }
6612
6613 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6614 tracing_entries_write(struct file *filp, const char __user *ubuf,
6615 size_t cnt, loff_t *ppos)
6616 {
6617 struct inode *inode = file_inode(filp);
6618 struct trace_array *tr = inode->i_private;
6619 unsigned long val;
6620 int ret;
6621
6622 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6623 if (ret)
6624 return ret;
6625
6626 /* must have at least 1 entry */
6627 if (!val)
6628 return -EINVAL;
6629
6630 /* value is in KB */
6631 val <<= 10;
6632 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6633 if (ret < 0)
6634 return ret;
6635
6636 *ppos += cnt;
6637
6638 return cnt;
6639 }
6640
6641 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6642 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6643 size_t cnt, loff_t *ppos)
6644 {
6645 struct trace_array *tr = filp->private_data;
6646 char buf[64];
6647 int r, cpu;
6648 unsigned long size = 0, expanded_size = 0;
6649
6650 mutex_lock(&trace_types_lock);
6651 for_each_tracing_cpu(cpu) {
6652 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6653 if (!ring_buffer_expanded)
6654 expanded_size += trace_buf_size >> 10;
6655 }
6656 if (ring_buffer_expanded)
6657 r = sprintf(buf, "%lu\n", size);
6658 else
6659 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6660 mutex_unlock(&trace_types_lock);
6661
6662 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6663 }
6664
6665 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6666 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6667 size_t cnt, loff_t *ppos)
6668 {
6669 /*
6670 * There is no need to read what the user has written, this function
6671 * is just to make sure that there is no error when "echo" is used
6672 */
6673
6674 *ppos += cnt;
6675
6676 return cnt;
6677 }
6678
6679 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6680 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6681 {
6682 struct trace_array *tr = inode->i_private;
6683
6684 /* disable tracing ? */
6685 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6686 tracer_tracing_off(tr);
6687 /* resize the ring buffer to 0 */
6688 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6689
6690 trace_array_put(tr);
6691
6692 return 0;
6693 }
6694
6695 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6696 tracing_mark_write(struct file *filp, const char __user *ubuf,
6697 size_t cnt, loff_t *fpos)
6698 {
6699 struct trace_array *tr = filp->private_data;
6700 struct ring_buffer_event *event;
6701 enum event_trigger_type tt = ETT_NONE;
6702 struct trace_buffer *buffer;
6703 struct print_entry *entry;
6704 unsigned long irq_flags;
6705 ssize_t written;
6706 int size;
6707 int len;
6708
6709 /* Used in tracing_mark_raw_write() as well */
6710 #define FAULTED_STR "<faulted>"
6711 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6712
6713 if (tracing_disabled)
6714 return -EINVAL;
6715
6716 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6717 return -EINVAL;
6718
6719 if (cnt > TRACE_BUF_SIZE)
6720 cnt = TRACE_BUF_SIZE;
6721
6722 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6723
6724 local_save_flags(irq_flags);
6725 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6726
6727 /* If less than "<faulted>", then make sure we can still add that */
6728 if (cnt < FAULTED_SIZE)
6729 size += FAULTED_SIZE - cnt;
6730
6731 buffer = tr->array_buffer.buffer;
6732 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6733 irq_flags, preempt_count());
6734 if (unlikely(!event))
6735 /* Ring buffer disabled, return as if not open for write */
6736 return -EBADF;
6737
6738 entry = ring_buffer_event_data(event);
6739 entry->ip = _THIS_IP_;
6740
6741 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6742 if (len) {
6743 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6744 cnt = FAULTED_SIZE;
6745 written = -EFAULT;
6746 } else
6747 written = cnt;
6748
6749 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6750 /* do not add \n before testing triggers, but add \0 */
6751 entry->buf[cnt] = '\0';
6752 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6753 }
6754
6755 if (entry->buf[cnt - 1] != '\n') {
6756 entry->buf[cnt] = '\n';
6757 entry->buf[cnt + 1] = '\0';
6758 } else
6759 entry->buf[cnt] = '\0';
6760
6761 if (static_branch_unlikely(&trace_marker_exports_enabled))
6762 ftrace_exports(event, TRACE_EXPORT_MARKER);
6763 __buffer_unlock_commit(buffer, event);
6764
6765 if (tt)
6766 event_triggers_post_call(tr->trace_marker_file, tt);
6767
6768 if (written > 0)
6769 *fpos += written;
6770
6771 return written;
6772 }
6773
6774 /* Limit it for now to 3K (including tag) */
6775 #define RAW_DATA_MAX_SIZE (1024*3)
6776
6777 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6778 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6779 size_t cnt, loff_t *fpos)
6780 {
6781 struct trace_array *tr = filp->private_data;
6782 struct ring_buffer_event *event;
6783 struct trace_buffer *buffer;
6784 struct raw_data_entry *entry;
6785 unsigned long irq_flags;
6786 ssize_t written;
6787 int size;
6788 int len;
6789
6790 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6791
6792 if (tracing_disabled)
6793 return -EINVAL;
6794
6795 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6796 return -EINVAL;
6797
6798 /* The marker must at least have a tag id */
6799 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6800 return -EINVAL;
6801
6802 if (cnt > TRACE_BUF_SIZE)
6803 cnt = TRACE_BUF_SIZE;
6804
6805 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6806
6807 local_save_flags(irq_flags);
6808 size = sizeof(*entry) + cnt;
6809 if (cnt < FAULT_SIZE_ID)
6810 size += FAULT_SIZE_ID - cnt;
6811
6812 buffer = tr->array_buffer.buffer;
6813 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6814 irq_flags, preempt_count());
6815 if (!event)
6816 /* Ring buffer disabled, return as if not open for write */
6817 return -EBADF;
6818
6819 entry = ring_buffer_event_data(event);
6820
6821 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6822 if (len) {
6823 entry->id = -1;
6824 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6825 written = -EFAULT;
6826 } else
6827 written = cnt;
6828
6829 __buffer_unlock_commit(buffer, event);
6830
6831 if (written > 0)
6832 *fpos += written;
6833
6834 return written;
6835 }
6836
tracing_clock_show(struct seq_file * m,void * v)6837 static int tracing_clock_show(struct seq_file *m, void *v)
6838 {
6839 struct trace_array *tr = m->private;
6840 int i;
6841
6842 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6843 seq_printf(m,
6844 "%s%s%s%s", i ? " " : "",
6845 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6846 i == tr->clock_id ? "]" : "");
6847 seq_putc(m, '\n');
6848
6849 return 0;
6850 }
6851
tracing_set_clock(struct trace_array * tr,const char * clockstr)6852 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6853 {
6854 int i;
6855
6856 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6857 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6858 break;
6859 }
6860 if (i == ARRAY_SIZE(trace_clocks))
6861 return -EINVAL;
6862
6863 mutex_lock(&trace_types_lock);
6864
6865 tr->clock_id = i;
6866
6867 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6868
6869 /*
6870 * New clock may not be consistent with the previous clock.
6871 * Reset the buffer so that it doesn't have incomparable timestamps.
6872 */
6873 tracing_reset_online_cpus(&tr->array_buffer);
6874
6875 #ifdef CONFIG_TRACER_MAX_TRACE
6876 if (tr->max_buffer.buffer)
6877 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6878 tracing_reset_online_cpus(&tr->max_buffer);
6879 #endif
6880
6881 mutex_unlock(&trace_types_lock);
6882
6883 return 0;
6884 }
6885
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6886 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6887 size_t cnt, loff_t *fpos)
6888 {
6889 struct seq_file *m = filp->private_data;
6890 struct trace_array *tr = m->private;
6891 char buf[64];
6892 const char *clockstr;
6893 int ret;
6894
6895 if (cnt >= sizeof(buf))
6896 return -EINVAL;
6897
6898 if (copy_from_user(buf, ubuf, cnt))
6899 return -EFAULT;
6900
6901 buf[cnt] = 0;
6902
6903 clockstr = strstrip(buf);
6904
6905 ret = tracing_set_clock(tr, clockstr);
6906 if (ret)
6907 return ret;
6908
6909 *fpos += cnt;
6910
6911 return cnt;
6912 }
6913
tracing_clock_open(struct inode * inode,struct file * file)6914 static int tracing_clock_open(struct inode *inode, struct file *file)
6915 {
6916 struct trace_array *tr = inode->i_private;
6917 int ret;
6918
6919 ret = tracing_check_open_get_tr(tr);
6920 if (ret)
6921 return ret;
6922
6923 ret = single_open(file, tracing_clock_show, inode->i_private);
6924 if (ret < 0)
6925 trace_array_put(tr);
6926
6927 return ret;
6928 }
6929
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6930 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6931 {
6932 struct trace_array *tr = m->private;
6933
6934 mutex_lock(&trace_types_lock);
6935
6936 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6937 seq_puts(m, "delta [absolute]\n");
6938 else
6939 seq_puts(m, "[delta] absolute\n");
6940
6941 mutex_unlock(&trace_types_lock);
6942
6943 return 0;
6944 }
6945
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6946 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6947 {
6948 struct trace_array *tr = inode->i_private;
6949 int ret;
6950
6951 ret = tracing_check_open_get_tr(tr);
6952 if (ret)
6953 return ret;
6954
6955 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6956 if (ret < 0)
6957 trace_array_put(tr);
6958
6959 return ret;
6960 }
6961
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)6962 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6963 {
6964 int ret = 0;
6965
6966 mutex_lock(&trace_types_lock);
6967
6968 if (abs && tr->time_stamp_abs_ref++)
6969 goto out;
6970
6971 if (!abs) {
6972 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6973 ret = -EINVAL;
6974 goto out;
6975 }
6976
6977 if (--tr->time_stamp_abs_ref)
6978 goto out;
6979 }
6980
6981 ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6982
6983 #ifdef CONFIG_TRACER_MAX_TRACE
6984 if (tr->max_buffer.buffer)
6985 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6986 #endif
6987 out:
6988 mutex_unlock(&trace_types_lock);
6989
6990 return ret;
6991 }
6992
6993 struct ftrace_buffer_info {
6994 struct trace_iterator iter;
6995 void *spare;
6996 unsigned int spare_cpu;
6997 unsigned int read;
6998 };
6999
7000 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7001 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7002 {
7003 struct trace_array *tr = inode->i_private;
7004 struct trace_iterator *iter;
7005 struct seq_file *m;
7006 int ret;
7007
7008 ret = tracing_check_open_get_tr(tr);
7009 if (ret)
7010 return ret;
7011
7012 if (file->f_mode & FMODE_READ) {
7013 iter = __tracing_open(inode, file, true);
7014 if (IS_ERR(iter))
7015 ret = PTR_ERR(iter);
7016 } else {
7017 /* Writes still need the seq_file to hold the private data */
7018 ret = -ENOMEM;
7019 m = kzalloc(sizeof(*m), GFP_KERNEL);
7020 if (!m)
7021 goto out;
7022 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7023 if (!iter) {
7024 kfree(m);
7025 goto out;
7026 }
7027 ret = 0;
7028
7029 iter->tr = tr;
7030 iter->array_buffer = &tr->max_buffer;
7031 iter->cpu_file = tracing_get_cpu(inode);
7032 m->private = iter;
7033 file->private_data = m;
7034 }
7035 out:
7036 if (ret < 0)
7037 trace_array_put(tr);
7038
7039 return ret;
7040 }
7041
7042 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7043 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7044 loff_t *ppos)
7045 {
7046 struct seq_file *m = filp->private_data;
7047 struct trace_iterator *iter = m->private;
7048 struct trace_array *tr = iter->tr;
7049 unsigned long val;
7050 int ret;
7051
7052 ret = tracing_update_buffers();
7053 if (ret < 0)
7054 return ret;
7055
7056 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7057 if (ret)
7058 return ret;
7059
7060 mutex_lock(&trace_types_lock);
7061
7062 if (tr->current_trace->use_max_tr) {
7063 ret = -EBUSY;
7064 goto out;
7065 }
7066
7067 local_irq_disable();
7068 arch_spin_lock(&tr->max_lock);
7069 if (tr->cond_snapshot)
7070 ret = -EBUSY;
7071 arch_spin_unlock(&tr->max_lock);
7072 local_irq_enable();
7073 if (ret)
7074 goto out;
7075
7076 switch (val) {
7077 case 0:
7078 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7079 ret = -EINVAL;
7080 break;
7081 }
7082 if (tr->allocated_snapshot)
7083 free_snapshot(tr);
7084 break;
7085 case 1:
7086 /* Only allow per-cpu swap if the ring buffer supports it */
7087 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7088 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7089 ret = -EINVAL;
7090 break;
7091 }
7092 #endif
7093 if (tr->allocated_snapshot)
7094 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7095 &tr->array_buffer, iter->cpu_file);
7096 else
7097 ret = tracing_alloc_snapshot_instance(tr);
7098 if (ret < 0)
7099 break;
7100 local_irq_disable();
7101 /* Now, we're going to swap */
7102 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7103 update_max_tr(tr, current, smp_processor_id(), NULL);
7104 else
7105 update_max_tr_single(tr, current, iter->cpu_file);
7106 local_irq_enable();
7107 break;
7108 default:
7109 if (tr->allocated_snapshot) {
7110 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7111 tracing_reset_online_cpus(&tr->max_buffer);
7112 else
7113 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7114 }
7115 break;
7116 }
7117
7118 if (ret >= 0) {
7119 *ppos += cnt;
7120 ret = cnt;
7121 }
7122 out:
7123 mutex_unlock(&trace_types_lock);
7124 return ret;
7125 }
7126
tracing_snapshot_release(struct inode * inode,struct file * file)7127 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7128 {
7129 struct seq_file *m = file->private_data;
7130 int ret;
7131
7132 ret = tracing_release(inode, file);
7133
7134 if (file->f_mode & FMODE_READ)
7135 return ret;
7136
7137 /* If write only, the seq_file is just a stub */
7138 if (m)
7139 kfree(m->private);
7140 kfree(m);
7141
7142 return 0;
7143 }
7144
7145 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7146 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7147 size_t count, loff_t *ppos);
7148 static int tracing_buffers_release(struct inode *inode, struct file *file);
7149 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7150 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7151
snapshot_raw_open(struct inode * inode,struct file * filp)7152 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7153 {
7154 struct ftrace_buffer_info *info;
7155 int ret;
7156
7157 /* The following checks for tracefs lockdown */
7158 ret = tracing_buffers_open(inode, filp);
7159 if (ret < 0)
7160 return ret;
7161
7162 info = filp->private_data;
7163
7164 if (info->iter.trace->use_max_tr) {
7165 tracing_buffers_release(inode, filp);
7166 return -EBUSY;
7167 }
7168
7169 info->iter.snapshot = true;
7170 info->iter.array_buffer = &info->iter.tr->max_buffer;
7171
7172 return ret;
7173 }
7174
7175 #endif /* CONFIG_TRACER_SNAPSHOT */
7176
7177
7178 static const struct file_operations tracing_thresh_fops = {
7179 .open = tracing_open_generic,
7180 .read = tracing_thresh_read,
7181 .write = tracing_thresh_write,
7182 .llseek = generic_file_llseek,
7183 };
7184
7185 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7186 static const struct file_operations tracing_max_lat_fops = {
7187 .open = tracing_open_generic,
7188 .read = tracing_max_lat_read,
7189 .write = tracing_max_lat_write,
7190 .llseek = generic_file_llseek,
7191 };
7192 #endif
7193
7194 static const struct file_operations set_tracer_fops = {
7195 .open = tracing_open_generic,
7196 .read = tracing_set_trace_read,
7197 .write = tracing_set_trace_write,
7198 .llseek = generic_file_llseek,
7199 };
7200
7201 static const struct file_operations tracing_pipe_fops = {
7202 .open = tracing_open_pipe,
7203 .poll = tracing_poll_pipe,
7204 .read = tracing_read_pipe,
7205 .splice_read = tracing_splice_read_pipe,
7206 .release = tracing_release_pipe,
7207 .llseek = no_llseek,
7208 };
7209
7210 static const struct file_operations tracing_entries_fops = {
7211 .open = tracing_open_generic_tr,
7212 .read = tracing_entries_read,
7213 .write = tracing_entries_write,
7214 .llseek = generic_file_llseek,
7215 .release = tracing_release_generic_tr,
7216 };
7217
7218 static const struct file_operations tracing_total_entries_fops = {
7219 .open = tracing_open_generic_tr,
7220 .read = tracing_total_entries_read,
7221 .llseek = generic_file_llseek,
7222 .release = tracing_release_generic_tr,
7223 };
7224
7225 static const struct file_operations tracing_free_buffer_fops = {
7226 .open = tracing_open_generic_tr,
7227 .write = tracing_free_buffer_write,
7228 .release = tracing_free_buffer_release,
7229 };
7230
7231 static const struct file_operations tracing_mark_fops = {
7232 .open = tracing_open_generic_tr,
7233 .write = tracing_mark_write,
7234 .llseek = generic_file_llseek,
7235 .release = tracing_release_generic_tr,
7236 };
7237
7238 static const struct file_operations tracing_mark_raw_fops = {
7239 .open = tracing_open_generic_tr,
7240 .write = tracing_mark_raw_write,
7241 .llseek = generic_file_llseek,
7242 .release = tracing_release_generic_tr,
7243 };
7244
7245 static const struct file_operations trace_clock_fops = {
7246 .open = tracing_clock_open,
7247 .read = seq_read,
7248 .llseek = seq_lseek,
7249 .release = tracing_single_release_tr,
7250 .write = tracing_clock_write,
7251 };
7252
7253 static const struct file_operations trace_time_stamp_mode_fops = {
7254 .open = tracing_time_stamp_mode_open,
7255 .read = seq_read,
7256 .llseek = seq_lseek,
7257 .release = tracing_single_release_tr,
7258 };
7259
7260 #ifdef CONFIG_TRACER_SNAPSHOT
7261 static const struct file_operations snapshot_fops = {
7262 .open = tracing_snapshot_open,
7263 .read = seq_read,
7264 .write = tracing_snapshot_write,
7265 .llseek = tracing_lseek,
7266 .release = tracing_snapshot_release,
7267 };
7268
7269 static const struct file_operations snapshot_raw_fops = {
7270 .open = snapshot_raw_open,
7271 .read = tracing_buffers_read,
7272 .release = tracing_buffers_release,
7273 .splice_read = tracing_buffers_splice_read,
7274 .llseek = no_llseek,
7275 };
7276
7277 #endif /* CONFIG_TRACER_SNAPSHOT */
7278
7279 #define TRACING_LOG_ERRS_MAX 8
7280 #define TRACING_LOG_LOC_MAX 128
7281
7282 #define CMD_PREFIX " Command: "
7283
7284 struct err_info {
7285 const char **errs; /* ptr to loc-specific array of err strings */
7286 u8 type; /* index into errs -> specific err string */
7287 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7288 u64 ts;
7289 };
7290
7291 struct tracing_log_err {
7292 struct list_head list;
7293 struct err_info info;
7294 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7295 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7296 };
7297
7298 static DEFINE_MUTEX(tracing_err_log_lock);
7299
get_tracing_log_err(struct trace_array * tr)7300 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7301 {
7302 struct tracing_log_err *err;
7303
7304 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7305 err = kzalloc(sizeof(*err), GFP_KERNEL);
7306 if (!err)
7307 err = ERR_PTR(-ENOMEM);
7308 else
7309 tr->n_err_log_entries++;
7310
7311 return err;
7312 }
7313
7314 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7315 list_del(&err->list);
7316
7317 return err;
7318 }
7319
7320 /**
7321 * err_pos - find the position of a string within a command for error careting
7322 * @cmd: The tracing command that caused the error
7323 * @str: The string to position the caret at within @cmd
7324 *
7325 * Finds the position of the first occurence of @str within @cmd. The
7326 * return value can be passed to tracing_log_err() for caret placement
7327 * within @cmd.
7328 *
7329 * Returns the index within @cmd of the first occurence of @str or 0
7330 * if @str was not found.
7331 */
err_pos(char * cmd,const char * str)7332 unsigned int err_pos(char *cmd, const char *str)
7333 {
7334 char *found;
7335
7336 if (WARN_ON(!strlen(cmd)))
7337 return 0;
7338
7339 found = strstr(cmd, str);
7340 if (found)
7341 return found - cmd;
7342
7343 return 0;
7344 }
7345
7346 /**
7347 * tracing_log_err - write an error to the tracing error log
7348 * @tr: The associated trace array for the error (NULL for top level array)
7349 * @loc: A string describing where the error occurred
7350 * @cmd: The tracing command that caused the error
7351 * @errs: The array of loc-specific static error strings
7352 * @type: The index into errs[], which produces the specific static err string
7353 * @pos: The position the caret should be placed in the cmd
7354 *
7355 * Writes an error into tracing/error_log of the form:
7356 *
7357 * <loc>: error: <text>
7358 * Command: <cmd>
7359 * ^
7360 *
7361 * tracing/error_log is a small log file containing the last
7362 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7363 * unless there has been a tracing error, and the error log can be
7364 * cleared and have its memory freed by writing the empty string in
7365 * truncation mode to it i.e. echo > tracing/error_log.
7366 *
7367 * NOTE: the @errs array along with the @type param are used to
7368 * produce a static error string - this string is not copied and saved
7369 * when the error is logged - only a pointer to it is saved. See
7370 * existing callers for examples of how static strings are typically
7371 * defined for use with tracing_log_err().
7372 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7373 void tracing_log_err(struct trace_array *tr,
7374 const char *loc, const char *cmd,
7375 const char **errs, u8 type, u8 pos)
7376 {
7377 struct tracing_log_err *err;
7378
7379 if (!tr)
7380 tr = &global_trace;
7381
7382 mutex_lock(&tracing_err_log_lock);
7383 err = get_tracing_log_err(tr);
7384 if (PTR_ERR(err) == -ENOMEM) {
7385 mutex_unlock(&tracing_err_log_lock);
7386 return;
7387 }
7388
7389 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7390 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7391
7392 err->info.errs = errs;
7393 err->info.type = type;
7394 err->info.pos = pos;
7395 err->info.ts = local_clock();
7396
7397 list_add_tail(&err->list, &tr->err_log);
7398 mutex_unlock(&tracing_err_log_lock);
7399 }
7400
clear_tracing_err_log(struct trace_array * tr)7401 static void clear_tracing_err_log(struct trace_array *tr)
7402 {
7403 struct tracing_log_err *err, *next;
7404
7405 mutex_lock(&tracing_err_log_lock);
7406 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7407 list_del(&err->list);
7408 kfree(err);
7409 }
7410
7411 tr->n_err_log_entries = 0;
7412 mutex_unlock(&tracing_err_log_lock);
7413 }
7414
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7415 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7416 {
7417 struct trace_array *tr = m->private;
7418
7419 mutex_lock(&tracing_err_log_lock);
7420
7421 return seq_list_start(&tr->err_log, *pos);
7422 }
7423
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7424 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7425 {
7426 struct trace_array *tr = m->private;
7427
7428 return seq_list_next(v, &tr->err_log, pos);
7429 }
7430
tracing_err_log_seq_stop(struct seq_file * m,void * v)7431 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7432 {
7433 mutex_unlock(&tracing_err_log_lock);
7434 }
7435
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7436 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7437 {
7438 u8 i;
7439
7440 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7441 seq_putc(m, ' ');
7442 for (i = 0; i < pos; i++)
7443 seq_putc(m, ' ');
7444 seq_puts(m, "^\n");
7445 }
7446
tracing_err_log_seq_show(struct seq_file * m,void * v)7447 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7448 {
7449 struct tracing_log_err *err = v;
7450
7451 if (err) {
7452 const char *err_text = err->info.errs[err->info.type];
7453 u64 sec = err->info.ts;
7454 u32 nsec;
7455
7456 nsec = do_div(sec, NSEC_PER_SEC);
7457 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7458 err->loc, err_text);
7459 seq_printf(m, "%s", err->cmd);
7460 tracing_err_log_show_pos(m, err->info.pos);
7461 }
7462
7463 return 0;
7464 }
7465
7466 static const struct seq_operations tracing_err_log_seq_ops = {
7467 .start = tracing_err_log_seq_start,
7468 .next = tracing_err_log_seq_next,
7469 .stop = tracing_err_log_seq_stop,
7470 .show = tracing_err_log_seq_show
7471 };
7472
tracing_err_log_open(struct inode * inode,struct file * file)7473 static int tracing_err_log_open(struct inode *inode, struct file *file)
7474 {
7475 struct trace_array *tr = inode->i_private;
7476 int ret = 0;
7477
7478 ret = tracing_check_open_get_tr(tr);
7479 if (ret)
7480 return ret;
7481
7482 /* If this file was opened for write, then erase contents */
7483 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7484 clear_tracing_err_log(tr);
7485
7486 if (file->f_mode & FMODE_READ) {
7487 ret = seq_open(file, &tracing_err_log_seq_ops);
7488 if (!ret) {
7489 struct seq_file *m = file->private_data;
7490 m->private = tr;
7491 } else {
7492 trace_array_put(tr);
7493 }
7494 }
7495 return ret;
7496 }
7497
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7498 static ssize_t tracing_err_log_write(struct file *file,
7499 const char __user *buffer,
7500 size_t count, loff_t *ppos)
7501 {
7502 return count;
7503 }
7504
tracing_err_log_release(struct inode * inode,struct file * file)7505 static int tracing_err_log_release(struct inode *inode, struct file *file)
7506 {
7507 struct trace_array *tr = inode->i_private;
7508
7509 trace_array_put(tr);
7510
7511 if (file->f_mode & FMODE_READ)
7512 seq_release(inode, file);
7513
7514 return 0;
7515 }
7516
7517 static const struct file_operations tracing_err_log_fops = {
7518 .open = tracing_err_log_open,
7519 .write = tracing_err_log_write,
7520 .read = seq_read,
7521 .llseek = seq_lseek,
7522 .release = tracing_err_log_release,
7523 };
7524
tracing_buffers_open(struct inode * inode,struct file * filp)7525 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7526 {
7527 struct trace_array *tr = inode->i_private;
7528 struct ftrace_buffer_info *info;
7529 int ret;
7530
7531 ret = tracing_check_open_get_tr(tr);
7532 if (ret)
7533 return ret;
7534
7535 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7536 if (!info) {
7537 trace_array_put(tr);
7538 return -ENOMEM;
7539 }
7540
7541 mutex_lock(&trace_types_lock);
7542
7543 info->iter.tr = tr;
7544 info->iter.cpu_file = tracing_get_cpu(inode);
7545 info->iter.trace = tr->current_trace;
7546 info->iter.array_buffer = &tr->array_buffer;
7547 info->spare = NULL;
7548 /* Force reading ring buffer for first read */
7549 info->read = (unsigned int)-1;
7550
7551 filp->private_data = info;
7552
7553 tr->trace_ref++;
7554
7555 mutex_unlock(&trace_types_lock);
7556
7557 ret = nonseekable_open(inode, filp);
7558 if (ret < 0)
7559 trace_array_put(tr);
7560
7561 return ret;
7562 }
7563
7564 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7565 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7566 {
7567 struct ftrace_buffer_info *info = filp->private_data;
7568 struct trace_iterator *iter = &info->iter;
7569
7570 return trace_poll(iter, filp, poll_table);
7571 }
7572
7573 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7574 tracing_buffers_read(struct file *filp, char __user *ubuf,
7575 size_t count, loff_t *ppos)
7576 {
7577 struct ftrace_buffer_info *info = filp->private_data;
7578 struct trace_iterator *iter = &info->iter;
7579 ssize_t ret = 0;
7580 ssize_t size;
7581
7582 if (!count)
7583 return 0;
7584
7585 #ifdef CONFIG_TRACER_MAX_TRACE
7586 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7587 return -EBUSY;
7588 #endif
7589
7590 if (!info->spare) {
7591 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7592 iter->cpu_file);
7593 if (IS_ERR(info->spare)) {
7594 ret = PTR_ERR(info->spare);
7595 info->spare = NULL;
7596 } else {
7597 info->spare_cpu = iter->cpu_file;
7598 }
7599 }
7600 if (!info->spare)
7601 return ret;
7602
7603 /* Do we have previous read data to read? */
7604 if (info->read < PAGE_SIZE)
7605 goto read;
7606
7607 again:
7608 trace_access_lock(iter->cpu_file);
7609 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7610 &info->spare,
7611 count,
7612 iter->cpu_file, 0);
7613 trace_access_unlock(iter->cpu_file);
7614
7615 if (ret < 0) {
7616 if (trace_empty(iter)) {
7617 if ((filp->f_flags & O_NONBLOCK))
7618 return -EAGAIN;
7619
7620 ret = wait_on_pipe(iter, 0);
7621 if (ret)
7622 return ret;
7623
7624 goto again;
7625 }
7626 return 0;
7627 }
7628
7629 info->read = 0;
7630 read:
7631 size = PAGE_SIZE - info->read;
7632 if (size > count)
7633 size = count;
7634
7635 ret = copy_to_user(ubuf, info->spare + info->read, size);
7636 if (ret == size)
7637 return -EFAULT;
7638
7639 size -= ret;
7640
7641 *ppos += size;
7642 info->read += size;
7643
7644 return size;
7645 }
7646
tracing_buffers_release(struct inode * inode,struct file * file)7647 static int tracing_buffers_release(struct inode *inode, struct file *file)
7648 {
7649 struct ftrace_buffer_info *info = file->private_data;
7650 struct trace_iterator *iter = &info->iter;
7651
7652 mutex_lock(&trace_types_lock);
7653
7654 iter->tr->trace_ref--;
7655
7656 __trace_array_put(iter->tr);
7657
7658 if (info->spare)
7659 ring_buffer_free_read_page(iter->array_buffer->buffer,
7660 info->spare_cpu, info->spare);
7661 kvfree(info);
7662
7663 mutex_unlock(&trace_types_lock);
7664
7665 return 0;
7666 }
7667
7668 struct buffer_ref {
7669 struct trace_buffer *buffer;
7670 void *page;
7671 int cpu;
7672 refcount_t refcount;
7673 };
7674
buffer_ref_release(struct buffer_ref * ref)7675 static void buffer_ref_release(struct buffer_ref *ref)
7676 {
7677 if (!refcount_dec_and_test(&ref->refcount))
7678 return;
7679 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7680 kfree(ref);
7681 }
7682
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7683 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7684 struct pipe_buffer *buf)
7685 {
7686 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7687
7688 buffer_ref_release(ref);
7689 buf->private = 0;
7690 }
7691
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7692 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7693 struct pipe_buffer *buf)
7694 {
7695 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7696
7697 if (refcount_read(&ref->refcount) > INT_MAX/2)
7698 return false;
7699
7700 refcount_inc(&ref->refcount);
7701 return true;
7702 }
7703
7704 /* Pipe buffer operations for a buffer. */
7705 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7706 .release = buffer_pipe_buf_release,
7707 .get = buffer_pipe_buf_get,
7708 };
7709
7710 /*
7711 * Callback from splice_to_pipe(), if we need to release some pages
7712 * at the end of the spd in case we error'ed out in filling the pipe.
7713 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7714 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7715 {
7716 struct buffer_ref *ref =
7717 (struct buffer_ref *)spd->partial[i].private;
7718
7719 buffer_ref_release(ref);
7720 spd->partial[i].private = 0;
7721 }
7722
7723 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7724 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7725 struct pipe_inode_info *pipe, size_t len,
7726 unsigned int flags)
7727 {
7728 struct ftrace_buffer_info *info = file->private_data;
7729 struct trace_iterator *iter = &info->iter;
7730 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7731 struct page *pages_def[PIPE_DEF_BUFFERS];
7732 struct splice_pipe_desc spd = {
7733 .pages = pages_def,
7734 .partial = partial_def,
7735 .nr_pages_max = PIPE_DEF_BUFFERS,
7736 .ops = &buffer_pipe_buf_ops,
7737 .spd_release = buffer_spd_release,
7738 };
7739 struct buffer_ref *ref;
7740 int entries, i;
7741 ssize_t ret = 0;
7742
7743 #ifdef CONFIG_TRACER_MAX_TRACE
7744 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7745 return -EBUSY;
7746 #endif
7747
7748 if (*ppos & (PAGE_SIZE - 1))
7749 return -EINVAL;
7750
7751 if (len & (PAGE_SIZE - 1)) {
7752 if (len < PAGE_SIZE)
7753 return -EINVAL;
7754 len &= PAGE_MASK;
7755 }
7756
7757 if (splice_grow_spd(pipe, &spd))
7758 return -ENOMEM;
7759
7760 again:
7761 trace_access_lock(iter->cpu_file);
7762 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7763
7764 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7765 struct page *page;
7766 int r;
7767
7768 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7769 if (!ref) {
7770 ret = -ENOMEM;
7771 break;
7772 }
7773
7774 refcount_set(&ref->refcount, 1);
7775 ref->buffer = iter->array_buffer->buffer;
7776 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7777 if (IS_ERR(ref->page)) {
7778 ret = PTR_ERR(ref->page);
7779 ref->page = NULL;
7780 kfree(ref);
7781 break;
7782 }
7783 ref->cpu = iter->cpu_file;
7784
7785 r = ring_buffer_read_page(ref->buffer, &ref->page,
7786 len, iter->cpu_file, 1);
7787 if (r < 0) {
7788 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7789 ref->page);
7790 kfree(ref);
7791 break;
7792 }
7793
7794 page = virt_to_page(ref->page);
7795
7796 spd.pages[i] = page;
7797 spd.partial[i].len = PAGE_SIZE;
7798 spd.partial[i].offset = 0;
7799 spd.partial[i].private = (unsigned long)ref;
7800 spd.nr_pages++;
7801 *ppos += PAGE_SIZE;
7802
7803 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7804 }
7805
7806 trace_access_unlock(iter->cpu_file);
7807 spd.nr_pages = i;
7808
7809 /* did we read anything? */
7810 if (!spd.nr_pages) {
7811 if (ret)
7812 goto out;
7813
7814 ret = -EAGAIN;
7815 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7816 goto out;
7817
7818 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7819 if (ret)
7820 goto out;
7821
7822 goto again;
7823 }
7824
7825 ret = splice_to_pipe(pipe, &spd);
7826 out:
7827 splice_shrink_spd(&spd);
7828
7829 return ret;
7830 }
7831
7832 static const struct file_operations tracing_buffers_fops = {
7833 .open = tracing_buffers_open,
7834 .read = tracing_buffers_read,
7835 .poll = tracing_buffers_poll,
7836 .release = tracing_buffers_release,
7837 .splice_read = tracing_buffers_splice_read,
7838 .llseek = no_llseek,
7839 };
7840
7841 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7842 tracing_stats_read(struct file *filp, char __user *ubuf,
7843 size_t count, loff_t *ppos)
7844 {
7845 struct inode *inode = file_inode(filp);
7846 struct trace_array *tr = inode->i_private;
7847 struct array_buffer *trace_buf = &tr->array_buffer;
7848 int cpu = tracing_get_cpu(inode);
7849 struct trace_seq *s;
7850 unsigned long cnt;
7851 unsigned long long t;
7852 unsigned long usec_rem;
7853
7854 s = kmalloc(sizeof(*s), GFP_KERNEL);
7855 if (!s)
7856 return -ENOMEM;
7857
7858 trace_seq_init(s);
7859
7860 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7861 trace_seq_printf(s, "entries: %ld\n", cnt);
7862
7863 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7864 trace_seq_printf(s, "overrun: %ld\n", cnt);
7865
7866 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7867 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7868
7869 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7870 trace_seq_printf(s, "bytes: %ld\n", cnt);
7871
7872 if (trace_clocks[tr->clock_id].in_ns) {
7873 /* local or global for trace_clock */
7874 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7875 usec_rem = do_div(t, USEC_PER_SEC);
7876 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7877 t, usec_rem);
7878
7879 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7880 usec_rem = do_div(t, USEC_PER_SEC);
7881 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7882 } else {
7883 /* counter or tsc mode for trace_clock */
7884 trace_seq_printf(s, "oldest event ts: %llu\n",
7885 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7886
7887 trace_seq_printf(s, "now ts: %llu\n",
7888 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7889 }
7890
7891 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7892 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7893
7894 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7895 trace_seq_printf(s, "read events: %ld\n", cnt);
7896
7897 count = simple_read_from_buffer(ubuf, count, ppos,
7898 s->buffer, trace_seq_used(s));
7899
7900 kfree(s);
7901
7902 return count;
7903 }
7904
7905 static const struct file_operations tracing_stats_fops = {
7906 .open = tracing_open_generic_tr,
7907 .read = tracing_stats_read,
7908 .llseek = generic_file_llseek,
7909 .release = tracing_release_generic_tr,
7910 };
7911
7912 #ifdef CONFIG_DYNAMIC_FTRACE
7913
7914 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7915 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7916 size_t cnt, loff_t *ppos)
7917 {
7918 ssize_t ret;
7919 char *buf;
7920 int r;
7921
7922 /* 256 should be plenty to hold the amount needed */
7923 buf = kmalloc(256, GFP_KERNEL);
7924 if (!buf)
7925 return -ENOMEM;
7926
7927 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7928 ftrace_update_tot_cnt,
7929 ftrace_number_of_pages,
7930 ftrace_number_of_groups);
7931
7932 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7933 kfree(buf);
7934 return ret;
7935 }
7936
7937 static const struct file_operations tracing_dyn_info_fops = {
7938 .open = tracing_open_generic,
7939 .read = tracing_read_dyn_info,
7940 .llseek = generic_file_llseek,
7941 };
7942 #endif /* CONFIG_DYNAMIC_FTRACE */
7943
7944 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7945 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7946 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7947 struct trace_array *tr, struct ftrace_probe_ops *ops,
7948 void *data)
7949 {
7950 tracing_snapshot_instance(tr);
7951 }
7952
7953 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7954 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7955 struct trace_array *tr, struct ftrace_probe_ops *ops,
7956 void *data)
7957 {
7958 struct ftrace_func_mapper *mapper = data;
7959 long *count = NULL;
7960
7961 if (mapper)
7962 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7963
7964 if (count) {
7965
7966 if (*count <= 0)
7967 return;
7968
7969 (*count)--;
7970 }
7971
7972 tracing_snapshot_instance(tr);
7973 }
7974
7975 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7976 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7977 struct ftrace_probe_ops *ops, void *data)
7978 {
7979 struct ftrace_func_mapper *mapper = data;
7980 long *count = NULL;
7981
7982 seq_printf(m, "%ps:", (void *)ip);
7983
7984 seq_puts(m, "snapshot");
7985
7986 if (mapper)
7987 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7988
7989 if (count)
7990 seq_printf(m, ":count=%ld\n", *count);
7991 else
7992 seq_puts(m, ":unlimited\n");
7993
7994 return 0;
7995 }
7996
7997 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)7998 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7999 unsigned long ip, void *init_data, void **data)
8000 {
8001 struct ftrace_func_mapper *mapper = *data;
8002
8003 if (!mapper) {
8004 mapper = allocate_ftrace_func_mapper();
8005 if (!mapper)
8006 return -ENOMEM;
8007 *data = mapper;
8008 }
8009
8010 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8011 }
8012
8013 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8014 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8015 unsigned long ip, void *data)
8016 {
8017 struct ftrace_func_mapper *mapper = data;
8018
8019 if (!ip) {
8020 if (!mapper)
8021 return;
8022 free_ftrace_func_mapper(mapper, NULL);
8023 return;
8024 }
8025
8026 ftrace_func_mapper_remove_ip(mapper, ip);
8027 }
8028
8029 static struct ftrace_probe_ops snapshot_probe_ops = {
8030 .func = ftrace_snapshot,
8031 .print = ftrace_snapshot_print,
8032 };
8033
8034 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8035 .func = ftrace_count_snapshot,
8036 .print = ftrace_snapshot_print,
8037 .init = ftrace_snapshot_init,
8038 .free = ftrace_snapshot_free,
8039 };
8040
8041 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8042 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8043 char *glob, char *cmd, char *param, int enable)
8044 {
8045 struct ftrace_probe_ops *ops;
8046 void *count = (void *)-1;
8047 char *number;
8048 int ret;
8049
8050 if (!tr)
8051 return -ENODEV;
8052
8053 /* hash funcs only work with set_ftrace_filter */
8054 if (!enable)
8055 return -EINVAL;
8056
8057 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8058
8059 if (glob[0] == '!')
8060 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8061
8062 if (!param)
8063 goto out_reg;
8064
8065 number = strsep(¶m, ":");
8066
8067 if (!strlen(number))
8068 goto out_reg;
8069
8070 /*
8071 * We use the callback data field (which is a pointer)
8072 * as our counter.
8073 */
8074 ret = kstrtoul(number, 0, (unsigned long *)&count);
8075 if (ret)
8076 return ret;
8077
8078 out_reg:
8079 ret = tracing_alloc_snapshot_instance(tr);
8080 if (ret < 0)
8081 goto out;
8082
8083 ret = register_ftrace_function_probe(glob, tr, ops, count);
8084
8085 out:
8086 return ret < 0 ? ret : 0;
8087 }
8088
8089 static struct ftrace_func_command ftrace_snapshot_cmd = {
8090 .name = "snapshot",
8091 .func = ftrace_trace_snapshot_callback,
8092 };
8093
register_snapshot_cmd(void)8094 static __init int register_snapshot_cmd(void)
8095 {
8096 return register_ftrace_command(&ftrace_snapshot_cmd);
8097 }
8098 #else
register_snapshot_cmd(void)8099 static inline __init int register_snapshot_cmd(void) { return 0; }
8100 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8101
tracing_get_dentry(struct trace_array * tr)8102 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8103 {
8104 if (WARN_ON(!tr->dir))
8105 return ERR_PTR(-ENODEV);
8106
8107 /* Top directory uses NULL as the parent */
8108 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8109 return NULL;
8110
8111 /* All sub buffers have a descriptor */
8112 return tr->dir;
8113 }
8114
tracing_dentry_percpu(struct trace_array * tr,int cpu)8115 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8116 {
8117 struct dentry *d_tracer;
8118
8119 if (tr->percpu_dir)
8120 return tr->percpu_dir;
8121
8122 d_tracer = tracing_get_dentry(tr);
8123 if (IS_ERR(d_tracer))
8124 return NULL;
8125
8126 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8127
8128 MEM_FAIL(!tr->percpu_dir,
8129 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8130
8131 return tr->percpu_dir;
8132 }
8133
8134 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8135 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8136 void *data, long cpu, const struct file_operations *fops)
8137 {
8138 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8139
8140 if (ret) /* See tracing_get_cpu() */
8141 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8142 return ret;
8143 }
8144
8145 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8146 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8147 {
8148 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8149 struct dentry *d_cpu;
8150 char cpu_dir[30]; /* 30 characters should be more than enough */
8151
8152 if (!d_percpu)
8153 return;
8154
8155 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8156 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8157 if (!d_cpu) {
8158 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8159 return;
8160 }
8161
8162 /* per cpu trace_pipe */
8163 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8164 tr, cpu, &tracing_pipe_fops);
8165
8166 /* per cpu trace */
8167 trace_create_cpu_file("trace", 0644, d_cpu,
8168 tr, cpu, &tracing_fops);
8169
8170 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8171 tr, cpu, &tracing_buffers_fops);
8172
8173 trace_create_cpu_file("stats", 0444, d_cpu,
8174 tr, cpu, &tracing_stats_fops);
8175
8176 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8177 tr, cpu, &tracing_entries_fops);
8178
8179 #ifdef CONFIG_TRACER_SNAPSHOT
8180 trace_create_cpu_file("snapshot", 0644, d_cpu,
8181 tr, cpu, &snapshot_fops);
8182
8183 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8184 tr, cpu, &snapshot_raw_fops);
8185 #endif
8186 }
8187
8188 #ifdef CONFIG_FTRACE_SELFTEST
8189 /* Let selftest have access to static functions in this file */
8190 #include "trace_selftest.c"
8191 #endif
8192
8193 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8194 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8195 loff_t *ppos)
8196 {
8197 struct trace_option_dentry *topt = filp->private_data;
8198 char *buf;
8199
8200 if (topt->flags->val & topt->opt->bit)
8201 buf = "1\n";
8202 else
8203 buf = "0\n";
8204
8205 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8206 }
8207
8208 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8209 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8210 loff_t *ppos)
8211 {
8212 struct trace_option_dentry *topt = filp->private_data;
8213 unsigned long val;
8214 int ret;
8215
8216 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8217 if (ret)
8218 return ret;
8219
8220 if (val != 0 && val != 1)
8221 return -EINVAL;
8222
8223 if (!!(topt->flags->val & topt->opt->bit) != val) {
8224 mutex_lock(&trace_types_lock);
8225 ret = __set_tracer_option(topt->tr, topt->flags,
8226 topt->opt, !val);
8227 mutex_unlock(&trace_types_lock);
8228 if (ret)
8229 return ret;
8230 }
8231
8232 *ppos += cnt;
8233
8234 return cnt;
8235 }
8236
8237
8238 static const struct file_operations trace_options_fops = {
8239 .open = tracing_open_generic,
8240 .read = trace_options_read,
8241 .write = trace_options_write,
8242 .llseek = generic_file_llseek,
8243 };
8244
8245 /*
8246 * In order to pass in both the trace_array descriptor as well as the index
8247 * to the flag that the trace option file represents, the trace_array
8248 * has a character array of trace_flags_index[], which holds the index
8249 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8250 * The address of this character array is passed to the flag option file
8251 * read/write callbacks.
8252 *
8253 * In order to extract both the index and the trace_array descriptor,
8254 * get_tr_index() uses the following algorithm.
8255 *
8256 * idx = *ptr;
8257 *
8258 * As the pointer itself contains the address of the index (remember
8259 * index[1] == 1).
8260 *
8261 * Then to get the trace_array descriptor, by subtracting that index
8262 * from the ptr, we get to the start of the index itself.
8263 *
8264 * ptr - idx == &index[0]
8265 *
8266 * Then a simple container_of() from that pointer gets us to the
8267 * trace_array descriptor.
8268 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8269 static void get_tr_index(void *data, struct trace_array **ptr,
8270 unsigned int *pindex)
8271 {
8272 *pindex = *(unsigned char *)data;
8273
8274 *ptr = container_of(data - *pindex, struct trace_array,
8275 trace_flags_index);
8276 }
8277
8278 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8279 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8280 loff_t *ppos)
8281 {
8282 void *tr_index = filp->private_data;
8283 struct trace_array *tr;
8284 unsigned int index;
8285 char *buf;
8286
8287 get_tr_index(tr_index, &tr, &index);
8288
8289 if (tr->trace_flags & (1 << index))
8290 buf = "1\n";
8291 else
8292 buf = "0\n";
8293
8294 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8295 }
8296
8297 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8298 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8299 loff_t *ppos)
8300 {
8301 void *tr_index = filp->private_data;
8302 struct trace_array *tr;
8303 unsigned int index;
8304 unsigned long val;
8305 int ret;
8306
8307 get_tr_index(tr_index, &tr, &index);
8308
8309 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8310 if (ret)
8311 return ret;
8312
8313 if (val != 0 && val != 1)
8314 return -EINVAL;
8315
8316 mutex_lock(&event_mutex);
8317 mutex_lock(&trace_types_lock);
8318 ret = set_tracer_flag(tr, 1 << index, val);
8319 mutex_unlock(&trace_types_lock);
8320 mutex_unlock(&event_mutex);
8321
8322 if (ret < 0)
8323 return ret;
8324
8325 *ppos += cnt;
8326
8327 return cnt;
8328 }
8329
8330 static const struct file_operations trace_options_core_fops = {
8331 .open = tracing_open_generic,
8332 .read = trace_options_core_read,
8333 .write = trace_options_core_write,
8334 .llseek = generic_file_llseek,
8335 };
8336
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8337 struct dentry *trace_create_file(const char *name,
8338 umode_t mode,
8339 struct dentry *parent,
8340 void *data,
8341 const struct file_operations *fops)
8342 {
8343 struct dentry *ret;
8344
8345 ret = tracefs_create_file(name, mode, parent, data, fops);
8346 if (!ret)
8347 pr_warn("Could not create tracefs '%s' entry\n", name);
8348
8349 return ret;
8350 }
8351
8352
trace_options_init_dentry(struct trace_array * tr)8353 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8354 {
8355 struct dentry *d_tracer;
8356
8357 if (tr->options)
8358 return tr->options;
8359
8360 d_tracer = tracing_get_dentry(tr);
8361 if (IS_ERR(d_tracer))
8362 return NULL;
8363
8364 tr->options = tracefs_create_dir("options", d_tracer);
8365 if (!tr->options) {
8366 pr_warn("Could not create tracefs directory 'options'\n");
8367 return NULL;
8368 }
8369
8370 return tr->options;
8371 }
8372
8373 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8374 create_trace_option_file(struct trace_array *tr,
8375 struct trace_option_dentry *topt,
8376 struct tracer_flags *flags,
8377 struct tracer_opt *opt)
8378 {
8379 struct dentry *t_options;
8380
8381 t_options = trace_options_init_dentry(tr);
8382 if (!t_options)
8383 return;
8384
8385 topt->flags = flags;
8386 topt->opt = opt;
8387 topt->tr = tr;
8388
8389 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8390 &trace_options_fops);
8391
8392 }
8393
8394 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8395 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8396 {
8397 struct trace_option_dentry *topts;
8398 struct trace_options *tr_topts;
8399 struct tracer_flags *flags;
8400 struct tracer_opt *opts;
8401 int cnt;
8402 int i;
8403
8404 if (!tracer)
8405 return;
8406
8407 flags = tracer->flags;
8408
8409 if (!flags || !flags->opts)
8410 return;
8411
8412 /*
8413 * If this is an instance, only create flags for tracers
8414 * the instance may have.
8415 */
8416 if (!trace_ok_for_array(tracer, tr))
8417 return;
8418
8419 for (i = 0; i < tr->nr_topts; i++) {
8420 /* Make sure there's no duplicate flags. */
8421 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8422 return;
8423 }
8424
8425 opts = flags->opts;
8426
8427 for (cnt = 0; opts[cnt].name; cnt++)
8428 ;
8429
8430 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8431 if (!topts)
8432 return;
8433
8434 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8435 GFP_KERNEL);
8436 if (!tr_topts) {
8437 kfree(topts);
8438 return;
8439 }
8440
8441 tr->topts = tr_topts;
8442 tr->topts[tr->nr_topts].tracer = tracer;
8443 tr->topts[tr->nr_topts].topts = topts;
8444 tr->nr_topts++;
8445
8446 for (cnt = 0; opts[cnt].name; cnt++) {
8447 create_trace_option_file(tr, &topts[cnt], flags,
8448 &opts[cnt]);
8449 MEM_FAIL(topts[cnt].entry == NULL,
8450 "Failed to create trace option: %s",
8451 opts[cnt].name);
8452 }
8453 }
8454
8455 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8456 create_trace_option_core_file(struct trace_array *tr,
8457 const char *option, long index)
8458 {
8459 struct dentry *t_options;
8460
8461 t_options = trace_options_init_dentry(tr);
8462 if (!t_options)
8463 return NULL;
8464
8465 return trace_create_file(option, 0644, t_options,
8466 (void *)&tr->trace_flags_index[index],
8467 &trace_options_core_fops);
8468 }
8469
create_trace_options_dir(struct trace_array * tr)8470 static void create_trace_options_dir(struct trace_array *tr)
8471 {
8472 struct dentry *t_options;
8473 bool top_level = tr == &global_trace;
8474 int i;
8475
8476 t_options = trace_options_init_dentry(tr);
8477 if (!t_options)
8478 return;
8479
8480 for (i = 0; trace_options[i]; i++) {
8481 if (top_level ||
8482 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8483 create_trace_option_core_file(tr, trace_options[i], i);
8484 }
8485 }
8486
8487 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8488 rb_simple_read(struct file *filp, char __user *ubuf,
8489 size_t cnt, loff_t *ppos)
8490 {
8491 struct trace_array *tr = filp->private_data;
8492 char buf[64];
8493 int r;
8494
8495 r = tracer_tracing_is_on(tr);
8496 r = sprintf(buf, "%d\n", r);
8497
8498 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8499 }
8500
8501 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8502 rb_simple_write(struct file *filp, const char __user *ubuf,
8503 size_t cnt, loff_t *ppos)
8504 {
8505 struct trace_array *tr = filp->private_data;
8506 struct trace_buffer *buffer = tr->array_buffer.buffer;
8507 unsigned long val;
8508 int ret;
8509
8510 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8511 if (ret)
8512 return ret;
8513
8514 if (buffer) {
8515 mutex_lock(&trace_types_lock);
8516 if (!!val == tracer_tracing_is_on(tr)) {
8517 val = 0; /* do nothing */
8518 } else if (val) {
8519 tracer_tracing_on(tr);
8520 if (tr->current_trace->start)
8521 tr->current_trace->start(tr);
8522 } else {
8523 tracer_tracing_off(tr);
8524 if (tr->current_trace->stop)
8525 tr->current_trace->stop(tr);
8526 }
8527 mutex_unlock(&trace_types_lock);
8528 }
8529
8530 (*ppos)++;
8531
8532 return cnt;
8533 }
8534
8535 static const struct file_operations rb_simple_fops = {
8536 .open = tracing_open_generic_tr,
8537 .read = rb_simple_read,
8538 .write = rb_simple_write,
8539 .release = tracing_release_generic_tr,
8540 .llseek = default_llseek,
8541 };
8542
8543 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8544 buffer_percent_read(struct file *filp, char __user *ubuf,
8545 size_t cnt, loff_t *ppos)
8546 {
8547 struct trace_array *tr = filp->private_data;
8548 char buf[64];
8549 int r;
8550
8551 r = tr->buffer_percent;
8552 r = sprintf(buf, "%d\n", r);
8553
8554 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8555 }
8556
8557 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8558 buffer_percent_write(struct file *filp, const char __user *ubuf,
8559 size_t cnt, loff_t *ppos)
8560 {
8561 struct trace_array *tr = filp->private_data;
8562 unsigned long val;
8563 int ret;
8564
8565 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8566 if (ret)
8567 return ret;
8568
8569 if (val > 100)
8570 return -EINVAL;
8571
8572 if (!val)
8573 val = 1;
8574
8575 tr->buffer_percent = val;
8576
8577 (*ppos)++;
8578
8579 return cnt;
8580 }
8581
8582 static const struct file_operations buffer_percent_fops = {
8583 .open = tracing_open_generic_tr,
8584 .read = buffer_percent_read,
8585 .write = buffer_percent_write,
8586 .release = tracing_release_generic_tr,
8587 .llseek = default_llseek,
8588 };
8589
8590 static struct dentry *trace_instance_dir;
8591
8592 static void
8593 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8594
8595 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)8596 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8597 {
8598 enum ring_buffer_flags rb_flags;
8599
8600 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8601
8602 buf->tr = tr;
8603
8604 buf->buffer = ring_buffer_alloc(size, rb_flags);
8605 if (!buf->buffer)
8606 return -ENOMEM;
8607
8608 buf->data = alloc_percpu(struct trace_array_cpu);
8609 if (!buf->data) {
8610 ring_buffer_free(buf->buffer);
8611 buf->buffer = NULL;
8612 return -ENOMEM;
8613 }
8614
8615 /* Allocate the first page for all buffers */
8616 set_buffer_entries(&tr->array_buffer,
8617 ring_buffer_size(tr->array_buffer.buffer, 0));
8618
8619 return 0;
8620 }
8621
allocate_trace_buffers(struct trace_array * tr,int size)8622 static int allocate_trace_buffers(struct trace_array *tr, int size)
8623 {
8624 int ret;
8625
8626 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8627 if (ret)
8628 return ret;
8629
8630 #ifdef CONFIG_TRACER_MAX_TRACE
8631 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8632 allocate_snapshot ? size : 1);
8633 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8634 ring_buffer_free(tr->array_buffer.buffer);
8635 tr->array_buffer.buffer = NULL;
8636 free_percpu(tr->array_buffer.data);
8637 tr->array_buffer.data = NULL;
8638 return -ENOMEM;
8639 }
8640 tr->allocated_snapshot = allocate_snapshot;
8641
8642 /*
8643 * Only the top level trace array gets its snapshot allocated
8644 * from the kernel command line.
8645 */
8646 allocate_snapshot = false;
8647 #endif
8648
8649 return 0;
8650 }
8651
free_trace_buffer(struct array_buffer * buf)8652 static void free_trace_buffer(struct array_buffer *buf)
8653 {
8654 if (buf->buffer) {
8655 ring_buffer_free(buf->buffer);
8656 buf->buffer = NULL;
8657 free_percpu(buf->data);
8658 buf->data = NULL;
8659 }
8660 }
8661
free_trace_buffers(struct trace_array * tr)8662 static void free_trace_buffers(struct trace_array *tr)
8663 {
8664 if (!tr)
8665 return;
8666
8667 free_trace_buffer(&tr->array_buffer);
8668
8669 #ifdef CONFIG_TRACER_MAX_TRACE
8670 free_trace_buffer(&tr->max_buffer);
8671 #endif
8672 }
8673
init_trace_flags_index(struct trace_array * tr)8674 static void init_trace_flags_index(struct trace_array *tr)
8675 {
8676 int i;
8677
8678 /* Used by the trace options files */
8679 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8680 tr->trace_flags_index[i] = i;
8681 }
8682
__update_tracer_options(struct trace_array * tr)8683 static void __update_tracer_options(struct trace_array *tr)
8684 {
8685 struct tracer *t;
8686
8687 for (t = trace_types; t; t = t->next)
8688 add_tracer_options(tr, t);
8689 }
8690
update_tracer_options(struct trace_array * tr)8691 static void update_tracer_options(struct trace_array *tr)
8692 {
8693 mutex_lock(&trace_types_lock);
8694 tracer_options_updated = true;
8695 __update_tracer_options(tr);
8696 mutex_unlock(&trace_types_lock);
8697 }
8698
8699 /* Must have trace_types_lock held */
trace_array_find(const char * instance)8700 struct trace_array *trace_array_find(const char *instance)
8701 {
8702 struct trace_array *tr, *found = NULL;
8703
8704 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8705 if (tr->name && strcmp(tr->name, instance) == 0) {
8706 found = tr;
8707 break;
8708 }
8709 }
8710
8711 return found;
8712 }
8713
trace_array_find_get(const char * instance)8714 struct trace_array *trace_array_find_get(const char *instance)
8715 {
8716 struct trace_array *tr;
8717
8718 mutex_lock(&trace_types_lock);
8719 tr = trace_array_find(instance);
8720 if (tr)
8721 tr->ref++;
8722 mutex_unlock(&trace_types_lock);
8723
8724 return tr;
8725 }
8726
trace_array_create_dir(struct trace_array * tr)8727 static int trace_array_create_dir(struct trace_array *tr)
8728 {
8729 int ret;
8730
8731 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8732 if (!tr->dir)
8733 return -EINVAL;
8734
8735 ret = event_trace_add_tracer(tr->dir, tr);
8736 if (ret) {
8737 tracefs_remove(tr->dir);
8738 return ret;
8739 }
8740
8741 init_tracer_tracefs(tr, tr->dir);
8742 __update_tracer_options(tr);
8743
8744 return ret;
8745 }
8746
trace_array_create(const char * name)8747 static struct trace_array *trace_array_create(const char *name)
8748 {
8749 struct trace_array *tr;
8750 int ret;
8751
8752 ret = -ENOMEM;
8753 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8754 if (!tr)
8755 return ERR_PTR(ret);
8756
8757 tr->name = kstrdup(name, GFP_KERNEL);
8758 if (!tr->name)
8759 goto out_free_tr;
8760
8761 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8762 goto out_free_tr;
8763
8764 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8765
8766 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8767
8768 raw_spin_lock_init(&tr->start_lock);
8769
8770 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8771
8772 tr->current_trace = &nop_trace;
8773
8774 INIT_LIST_HEAD(&tr->systems);
8775 INIT_LIST_HEAD(&tr->events);
8776 INIT_LIST_HEAD(&tr->hist_vars);
8777 INIT_LIST_HEAD(&tr->err_log);
8778
8779 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8780 goto out_free_tr;
8781
8782 if (ftrace_allocate_ftrace_ops(tr) < 0)
8783 goto out_free_tr;
8784
8785 ftrace_init_trace_array(tr);
8786
8787 init_trace_flags_index(tr);
8788
8789 if (trace_instance_dir) {
8790 ret = trace_array_create_dir(tr);
8791 if (ret)
8792 goto out_free_tr;
8793 } else
8794 __trace_early_add_events(tr);
8795
8796 list_add(&tr->list, &ftrace_trace_arrays);
8797
8798 tr->ref++;
8799
8800 return tr;
8801
8802 out_free_tr:
8803 ftrace_free_ftrace_ops(tr);
8804 free_trace_buffers(tr);
8805 free_cpumask_var(tr->tracing_cpumask);
8806 kfree(tr->name);
8807 kfree(tr);
8808
8809 return ERR_PTR(ret);
8810 }
8811
instance_mkdir(const char * name)8812 static int instance_mkdir(const char *name)
8813 {
8814 struct trace_array *tr;
8815 int ret;
8816
8817 mutex_lock(&event_mutex);
8818 mutex_lock(&trace_types_lock);
8819
8820 ret = -EEXIST;
8821 if (trace_array_find(name))
8822 goto out_unlock;
8823
8824 tr = trace_array_create(name);
8825
8826 ret = PTR_ERR_OR_ZERO(tr);
8827
8828 out_unlock:
8829 mutex_unlock(&trace_types_lock);
8830 mutex_unlock(&event_mutex);
8831 return ret;
8832 }
8833
8834 /**
8835 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8836 * @name: The name of the trace array to be looked up/created.
8837 *
8838 * Returns pointer to trace array with given name.
8839 * NULL, if it cannot be created.
8840 *
8841 * NOTE: This function increments the reference counter associated with the
8842 * trace array returned. This makes sure it cannot be freed while in use.
8843 * Use trace_array_put() once the trace array is no longer needed.
8844 * If the trace_array is to be freed, trace_array_destroy() needs to
8845 * be called after the trace_array_put(), or simply let user space delete
8846 * it from the tracefs instances directory. But until the
8847 * trace_array_put() is called, user space can not delete it.
8848 *
8849 */
trace_array_get_by_name(const char * name)8850 struct trace_array *trace_array_get_by_name(const char *name)
8851 {
8852 struct trace_array *tr;
8853
8854 mutex_lock(&event_mutex);
8855 mutex_lock(&trace_types_lock);
8856
8857 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8858 if (tr->name && strcmp(tr->name, name) == 0)
8859 goto out_unlock;
8860 }
8861
8862 tr = trace_array_create(name);
8863
8864 if (IS_ERR(tr))
8865 tr = NULL;
8866 out_unlock:
8867 if (tr)
8868 tr->ref++;
8869
8870 mutex_unlock(&trace_types_lock);
8871 mutex_unlock(&event_mutex);
8872 return tr;
8873 }
8874 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8875
__remove_instance(struct trace_array * tr)8876 static int __remove_instance(struct trace_array *tr)
8877 {
8878 int i;
8879
8880 /* Reference counter for a newly created trace array = 1. */
8881 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8882 return -EBUSY;
8883
8884 list_del(&tr->list);
8885
8886 /* Disable all the flags that were enabled coming in */
8887 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8888 if ((1 << i) & ZEROED_TRACE_FLAGS)
8889 set_tracer_flag(tr, 1 << i, 0);
8890 }
8891
8892 tracing_set_nop(tr);
8893 clear_ftrace_function_probes(tr);
8894 event_trace_del_tracer(tr);
8895 ftrace_clear_pids(tr);
8896 ftrace_destroy_function_files(tr);
8897 tracefs_remove(tr->dir);
8898 free_trace_buffers(tr);
8899
8900 for (i = 0; i < tr->nr_topts; i++) {
8901 kfree(tr->topts[i].topts);
8902 }
8903 kfree(tr->topts);
8904
8905 free_cpumask_var(tr->tracing_cpumask);
8906 kfree(tr->name);
8907 kfree(tr);
8908
8909 return 0;
8910 }
8911
trace_array_destroy(struct trace_array * this_tr)8912 int trace_array_destroy(struct trace_array *this_tr)
8913 {
8914 struct trace_array *tr;
8915 int ret;
8916
8917 if (!this_tr)
8918 return -EINVAL;
8919
8920 mutex_lock(&event_mutex);
8921 mutex_lock(&trace_types_lock);
8922
8923 ret = -ENODEV;
8924
8925 /* Making sure trace array exists before destroying it. */
8926 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8927 if (tr == this_tr) {
8928 ret = __remove_instance(tr);
8929 break;
8930 }
8931 }
8932
8933 mutex_unlock(&trace_types_lock);
8934 mutex_unlock(&event_mutex);
8935
8936 return ret;
8937 }
8938 EXPORT_SYMBOL_GPL(trace_array_destroy);
8939
instance_rmdir(const char * name)8940 static int instance_rmdir(const char *name)
8941 {
8942 struct trace_array *tr;
8943 int ret;
8944
8945 mutex_lock(&event_mutex);
8946 mutex_lock(&trace_types_lock);
8947
8948 ret = -ENODEV;
8949 tr = trace_array_find(name);
8950 if (tr)
8951 ret = __remove_instance(tr);
8952
8953 mutex_unlock(&trace_types_lock);
8954 mutex_unlock(&event_mutex);
8955
8956 return ret;
8957 }
8958
create_trace_instances(struct dentry * d_tracer)8959 static __init void create_trace_instances(struct dentry *d_tracer)
8960 {
8961 struct trace_array *tr;
8962
8963 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8964 instance_mkdir,
8965 instance_rmdir);
8966 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8967 return;
8968
8969 mutex_lock(&event_mutex);
8970 mutex_lock(&trace_types_lock);
8971
8972 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8973 if (!tr->name)
8974 continue;
8975 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8976 "Failed to create instance directory\n"))
8977 break;
8978 }
8979
8980 mutex_unlock(&trace_types_lock);
8981 mutex_unlock(&event_mutex);
8982 }
8983
8984 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)8985 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8986 {
8987 struct trace_event_file *file;
8988 int cpu;
8989
8990 trace_create_file("available_tracers", 0444, d_tracer,
8991 tr, &show_traces_fops);
8992
8993 trace_create_file("current_tracer", 0644, d_tracer,
8994 tr, &set_tracer_fops);
8995
8996 trace_create_file("tracing_cpumask", 0644, d_tracer,
8997 tr, &tracing_cpumask_fops);
8998
8999 trace_create_file("trace_options", 0644, d_tracer,
9000 tr, &tracing_iter_fops);
9001
9002 trace_create_file("trace", 0644, d_tracer,
9003 tr, &tracing_fops);
9004
9005 trace_create_file("trace_pipe", 0444, d_tracer,
9006 tr, &tracing_pipe_fops);
9007
9008 trace_create_file("buffer_size_kb", 0644, d_tracer,
9009 tr, &tracing_entries_fops);
9010
9011 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9012 tr, &tracing_total_entries_fops);
9013
9014 trace_create_file("free_buffer", 0200, d_tracer,
9015 tr, &tracing_free_buffer_fops);
9016
9017 trace_create_file("trace_marker", 0220, d_tracer,
9018 tr, &tracing_mark_fops);
9019
9020 file = __find_event_file(tr, "ftrace", "print");
9021 if (file && file->dir)
9022 trace_create_file("trigger", 0644, file->dir, file,
9023 &event_trigger_fops);
9024 tr->trace_marker_file = file;
9025
9026 trace_create_file("trace_marker_raw", 0220, d_tracer,
9027 tr, &tracing_mark_raw_fops);
9028
9029 trace_create_file("trace_clock", 0644, d_tracer, tr,
9030 &trace_clock_fops);
9031
9032 trace_create_file("tracing_on", 0644, d_tracer,
9033 tr, &rb_simple_fops);
9034
9035 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9036 &trace_time_stamp_mode_fops);
9037
9038 tr->buffer_percent = 50;
9039
9040 trace_create_file("buffer_percent", 0444, d_tracer,
9041 tr, &buffer_percent_fops);
9042
9043 create_trace_options_dir(tr);
9044
9045 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9046 trace_create_maxlat_file(tr, d_tracer);
9047 #endif
9048
9049 if (ftrace_create_function_files(tr, d_tracer))
9050 MEM_FAIL(1, "Could not allocate function filter files");
9051
9052 #ifdef CONFIG_TRACER_SNAPSHOT
9053 trace_create_file("snapshot", 0644, d_tracer,
9054 tr, &snapshot_fops);
9055 #endif
9056
9057 trace_create_file("error_log", 0644, d_tracer,
9058 tr, &tracing_err_log_fops);
9059
9060 for_each_tracing_cpu(cpu)
9061 tracing_init_tracefs_percpu(tr, cpu);
9062
9063 ftrace_init_tracefs(tr, d_tracer);
9064 }
9065
trace_automount(struct dentry * mntpt,void * ingore)9066 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9067 {
9068 struct vfsmount *mnt;
9069 struct file_system_type *type;
9070
9071 /*
9072 * To maintain backward compatibility for tools that mount
9073 * debugfs to get to the tracing facility, tracefs is automatically
9074 * mounted to the debugfs/tracing directory.
9075 */
9076 type = get_fs_type("tracefs");
9077 if (!type)
9078 return NULL;
9079 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9080 put_filesystem(type);
9081 if (IS_ERR(mnt))
9082 return NULL;
9083 mntget(mnt);
9084
9085 return mnt;
9086 }
9087
9088 /**
9089 * tracing_init_dentry - initialize top level trace array
9090 *
9091 * This is called when creating files or directories in the tracing
9092 * directory. It is called via fs_initcall() by any of the boot up code
9093 * and expects to return the dentry of the top level tracing directory.
9094 */
tracing_init_dentry(void)9095 int tracing_init_dentry(void)
9096 {
9097 struct trace_array *tr = &global_trace;
9098
9099 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9100 pr_warn("Tracing disabled due to lockdown\n");
9101 return -EPERM;
9102 }
9103
9104 /* The top level trace array uses NULL as parent */
9105 if (tr->dir)
9106 return 0;
9107
9108 if (WARN_ON(!tracefs_initialized()))
9109 return -ENODEV;
9110
9111 /*
9112 * As there may still be users that expect the tracing
9113 * files to exist in debugfs/tracing, we must automount
9114 * the tracefs file system there, so older tools still
9115 * work with the newer kerenl.
9116 */
9117 tr->dir = debugfs_create_automount("tracing", NULL,
9118 trace_automount, NULL);
9119
9120 return 0;
9121 }
9122
9123 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9124 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9125
trace_eval_init(void)9126 static void __init trace_eval_init(void)
9127 {
9128 int len;
9129
9130 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9131 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9132 }
9133
9134 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9135 static void trace_module_add_evals(struct module *mod)
9136 {
9137 if (!mod->num_trace_evals)
9138 return;
9139
9140 /*
9141 * Modules with bad taint do not have events created, do
9142 * not bother with enums either.
9143 */
9144 if (trace_module_has_bad_taint(mod))
9145 return;
9146
9147 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9148 }
9149
9150 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9151 static void trace_module_remove_evals(struct module *mod)
9152 {
9153 union trace_eval_map_item *map;
9154 union trace_eval_map_item **last = &trace_eval_maps;
9155
9156 if (!mod->num_trace_evals)
9157 return;
9158
9159 mutex_lock(&trace_eval_mutex);
9160
9161 map = trace_eval_maps;
9162
9163 while (map) {
9164 if (map->head.mod == mod)
9165 break;
9166 map = trace_eval_jmp_to_tail(map);
9167 last = &map->tail.next;
9168 map = map->tail.next;
9169 }
9170 if (!map)
9171 goto out;
9172
9173 *last = trace_eval_jmp_to_tail(map)->tail.next;
9174 kfree(map);
9175 out:
9176 mutex_unlock(&trace_eval_mutex);
9177 }
9178 #else
trace_module_remove_evals(struct module * mod)9179 static inline void trace_module_remove_evals(struct module *mod) { }
9180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9181
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9182 static int trace_module_notify(struct notifier_block *self,
9183 unsigned long val, void *data)
9184 {
9185 struct module *mod = data;
9186
9187 switch (val) {
9188 case MODULE_STATE_COMING:
9189 trace_module_add_evals(mod);
9190 break;
9191 case MODULE_STATE_GOING:
9192 trace_module_remove_evals(mod);
9193 break;
9194 }
9195
9196 return NOTIFY_OK;
9197 }
9198
9199 static struct notifier_block trace_module_nb = {
9200 .notifier_call = trace_module_notify,
9201 .priority = 0,
9202 };
9203 #endif /* CONFIG_MODULES */
9204
tracer_init_tracefs(void)9205 static __init int tracer_init_tracefs(void)
9206 {
9207 int ret;
9208
9209 trace_access_lock_init();
9210
9211 ret = tracing_init_dentry();
9212 if (ret)
9213 return 0;
9214
9215 event_trace_init();
9216
9217 init_tracer_tracefs(&global_trace, NULL);
9218 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9219
9220 trace_create_file("tracing_thresh", 0644, NULL,
9221 &global_trace, &tracing_thresh_fops);
9222
9223 trace_create_file("README", 0444, NULL,
9224 NULL, &tracing_readme_fops);
9225
9226 trace_create_file("saved_cmdlines", 0444, NULL,
9227 NULL, &tracing_saved_cmdlines_fops);
9228
9229 trace_create_file("saved_cmdlines_size", 0644, NULL,
9230 NULL, &tracing_saved_cmdlines_size_fops);
9231
9232 trace_create_file("saved_tgids", 0444, NULL,
9233 NULL, &tracing_saved_tgids_fops);
9234
9235 trace_eval_init();
9236
9237 trace_create_eval_file(NULL);
9238
9239 #ifdef CONFIG_MODULES
9240 register_module_notifier(&trace_module_nb);
9241 #endif
9242
9243 #ifdef CONFIG_DYNAMIC_FTRACE
9244 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9245 NULL, &tracing_dyn_info_fops);
9246 #endif
9247
9248 create_trace_instances(NULL);
9249
9250 update_tracer_options(&global_trace);
9251
9252 return 0;
9253 }
9254
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9255 static int trace_panic_handler(struct notifier_block *this,
9256 unsigned long event, void *unused)
9257 {
9258 if (ftrace_dump_on_oops)
9259 ftrace_dump(ftrace_dump_on_oops);
9260 return NOTIFY_OK;
9261 }
9262
9263 static struct notifier_block trace_panic_notifier = {
9264 .notifier_call = trace_panic_handler,
9265 .next = NULL,
9266 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9267 };
9268
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9269 static int trace_die_handler(struct notifier_block *self,
9270 unsigned long val,
9271 void *data)
9272 {
9273 switch (val) {
9274 case DIE_OOPS:
9275 if (ftrace_dump_on_oops)
9276 ftrace_dump(ftrace_dump_on_oops);
9277 break;
9278 default:
9279 break;
9280 }
9281 return NOTIFY_OK;
9282 }
9283
9284 static struct notifier_block trace_die_notifier = {
9285 .notifier_call = trace_die_handler,
9286 .priority = 200
9287 };
9288
9289 /*
9290 * printk is set to max of 1024, we really don't need it that big.
9291 * Nothing should be printing 1000 characters anyway.
9292 */
9293 #define TRACE_MAX_PRINT 1000
9294
9295 /*
9296 * Define here KERN_TRACE so that we have one place to modify
9297 * it if we decide to change what log level the ftrace dump
9298 * should be at.
9299 */
9300 #define KERN_TRACE KERN_EMERG
9301
9302 void
trace_printk_seq(struct trace_seq * s)9303 trace_printk_seq(struct trace_seq *s)
9304 {
9305 /* Probably should print a warning here. */
9306 if (s->seq.len >= TRACE_MAX_PRINT)
9307 s->seq.len = TRACE_MAX_PRINT;
9308
9309 /*
9310 * More paranoid code. Although the buffer size is set to
9311 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9312 * an extra layer of protection.
9313 */
9314 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9315 s->seq.len = s->seq.size - 1;
9316
9317 /* should be zero ended, but we are paranoid. */
9318 s->buffer[s->seq.len] = 0;
9319
9320 printk(KERN_TRACE "%s", s->buffer);
9321
9322 trace_seq_init(s);
9323 }
9324
trace_init_global_iter(struct trace_iterator * iter)9325 void trace_init_global_iter(struct trace_iterator *iter)
9326 {
9327 iter->tr = &global_trace;
9328 iter->trace = iter->tr->current_trace;
9329 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9330 iter->array_buffer = &global_trace.array_buffer;
9331
9332 if (iter->trace && iter->trace->open)
9333 iter->trace->open(iter);
9334
9335 /* Annotate start of buffers if we had overruns */
9336 if (ring_buffer_overruns(iter->array_buffer->buffer))
9337 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9338
9339 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9340 if (trace_clocks[iter->tr->clock_id].in_ns)
9341 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9342 }
9343
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9344 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9345 {
9346 /* use static because iter can be a bit big for the stack */
9347 static struct trace_iterator iter;
9348 static atomic_t dump_running;
9349 struct trace_array *tr = &global_trace;
9350 unsigned int old_userobj;
9351 unsigned long flags;
9352 int cnt = 0, cpu;
9353
9354 /* Only allow one dump user at a time. */
9355 if (atomic_inc_return(&dump_running) != 1) {
9356 atomic_dec(&dump_running);
9357 return;
9358 }
9359
9360 /*
9361 * Always turn off tracing when we dump.
9362 * We don't need to show trace output of what happens
9363 * between multiple crashes.
9364 *
9365 * If the user does a sysrq-z, then they can re-enable
9366 * tracing with echo 1 > tracing_on.
9367 */
9368 tracing_off();
9369
9370 local_irq_save(flags);
9371 printk_nmi_direct_enter();
9372
9373 /* Simulate the iterator */
9374 trace_init_global_iter(&iter);
9375 /* Can not use kmalloc for iter.temp */
9376 iter.temp = static_temp_buf;
9377 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9378
9379 for_each_tracing_cpu(cpu) {
9380 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9381 }
9382
9383 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9384
9385 /* don't look at user memory in panic mode */
9386 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9387
9388 switch (oops_dump_mode) {
9389 case DUMP_ALL:
9390 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9391 break;
9392 case DUMP_ORIG:
9393 iter.cpu_file = raw_smp_processor_id();
9394 break;
9395 case DUMP_NONE:
9396 goto out_enable;
9397 default:
9398 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9399 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9400 }
9401
9402 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9403
9404 /* Did function tracer already get disabled? */
9405 if (ftrace_is_dead()) {
9406 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9407 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9408 }
9409
9410 /*
9411 * We need to stop all tracing on all CPUS to read
9412 * the next buffer. This is a bit expensive, but is
9413 * not done often. We fill all what we can read,
9414 * and then release the locks again.
9415 */
9416
9417 while (!trace_empty(&iter)) {
9418
9419 if (!cnt)
9420 printk(KERN_TRACE "---------------------------------\n");
9421
9422 cnt++;
9423
9424 trace_iterator_reset(&iter);
9425 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9426
9427 if (trace_find_next_entry_inc(&iter) != NULL) {
9428 int ret;
9429
9430 ret = print_trace_line(&iter);
9431 if (ret != TRACE_TYPE_NO_CONSUME)
9432 trace_consume(&iter);
9433 }
9434 touch_nmi_watchdog();
9435
9436 trace_printk_seq(&iter.seq);
9437 }
9438
9439 if (!cnt)
9440 printk(KERN_TRACE " (ftrace buffer empty)\n");
9441 else
9442 printk(KERN_TRACE "---------------------------------\n");
9443
9444 out_enable:
9445 tr->trace_flags |= old_userobj;
9446
9447 for_each_tracing_cpu(cpu) {
9448 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9449 }
9450 atomic_dec(&dump_running);
9451 printk_nmi_direct_exit();
9452 local_irq_restore(flags);
9453 }
9454 EXPORT_SYMBOL_GPL(ftrace_dump);
9455
trace_run_command(const char * buf,int (* createfn)(int,char **))9456 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9457 {
9458 char **argv;
9459 int argc, ret;
9460
9461 argc = 0;
9462 ret = 0;
9463 argv = argv_split(GFP_KERNEL, buf, &argc);
9464 if (!argv)
9465 return -ENOMEM;
9466
9467 if (argc)
9468 ret = createfn(argc, argv);
9469
9470 argv_free(argv);
9471
9472 return ret;
9473 }
9474
9475 #define WRITE_BUFSIZE 4096
9476
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9477 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9478 size_t count, loff_t *ppos,
9479 int (*createfn)(int, char **))
9480 {
9481 char *kbuf, *buf, *tmp;
9482 int ret = 0;
9483 size_t done = 0;
9484 size_t size;
9485
9486 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9487 if (!kbuf)
9488 return -ENOMEM;
9489
9490 while (done < count) {
9491 size = count - done;
9492
9493 if (size >= WRITE_BUFSIZE)
9494 size = WRITE_BUFSIZE - 1;
9495
9496 if (copy_from_user(kbuf, buffer + done, size)) {
9497 ret = -EFAULT;
9498 goto out;
9499 }
9500 kbuf[size] = '\0';
9501 buf = kbuf;
9502 do {
9503 tmp = strchr(buf, '\n');
9504 if (tmp) {
9505 *tmp = '\0';
9506 size = tmp - buf + 1;
9507 } else {
9508 size = strlen(buf);
9509 if (done + size < count) {
9510 if (buf != kbuf)
9511 break;
9512 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9513 pr_warn("Line length is too long: Should be less than %d\n",
9514 WRITE_BUFSIZE - 2);
9515 ret = -EINVAL;
9516 goto out;
9517 }
9518 }
9519 done += size;
9520
9521 /* Remove comments */
9522 tmp = strchr(buf, '#');
9523
9524 if (tmp)
9525 *tmp = '\0';
9526
9527 ret = trace_run_command(buf, createfn);
9528 if (ret)
9529 goto out;
9530 buf += size;
9531
9532 } while (done < count);
9533 }
9534 ret = done;
9535
9536 out:
9537 kfree(kbuf);
9538
9539 return ret;
9540 }
9541
tracer_alloc_buffers(void)9542 __init static int tracer_alloc_buffers(void)
9543 {
9544 int ring_buf_size;
9545 int ret = -ENOMEM;
9546
9547
9548 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9549 pr_warn("Tracing disabled due to lockdown\n");
9550 return -EPERM;
9551 }
9552
9553 /*
9554 * Make sure we don't accidentally add more trace options
9555 * than we have bits for.
9556 */
9557 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9558
9559 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9560 goto out;
9561
9562 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9563 goto out_free_buffer_mask;
9564
9565 /* Only allocate trace_printk buffers if a trace_printk exists */
9566 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9567 /* Must be called before global_trace.buffer is allocated */
9568 trace_printk_init_buffers();
9569
9570 /* To save memory, keep the ring buffer size to its minimum */
9571 if (ring_buffer_expanded)
9572 ring_buf_size = trace_buf_size;
9573 else
9574 ring_buf_size = 1;
9575
9576 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9577 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9578
9579 raw_spin_lock_init(&global_trace.start_lock);
9580
9581 /*
9582 * The prepare callbacks allocates some memory for the ring buffer. We
9583 * don't free the buffer if the CPU goes down. If we were to free
9584 * the buffer, then the user would lose any trace that was in the
9585 * buffer. The memory will be removed once the "instance" is removed.
9586 */
9587 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9588 "trace/RB:preapre", trace_rb_cpu_prepare,
9589 NULL);
9590 if (ret < 0)
9591 goto out_free_cpumask;
9592 /* Used for event triggers */
9593 ret = -ENOMEM;
9594 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9595 if (!temp_buffer)
9596 goto out_rm_hp_state;
9597
9598 if (trace_create_savedcmd() < 0)
9599 goto out_free_temp_buffer;
9600
9601 /* TODO: make the number of buffers hot pluggable with CPUS */
9602 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9603 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9604 goto out_free_savedcmd;
9605 }
9606
9607 if (global_trace.buffer_disabled)
9608 tracing_off();
9609
9610 if (trace_boot_clock) {
9611 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9612 if (ret < 0)
9613 pr_warn("Trace clock %s not defined, going back to default\n",
9614 trace_boot_clock);
9615 }
9616
9617 /*
9618 * register_tracer() might reference current_trace, so it
9619 * needs to be set before we register anything. This is
9620 * just a bootstrap of current_trace anyway.
9621 */
9622 global_trace.current_trace = &nop_trace;
9623
9624 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9625
9626 ftrace_init_global_array_ops(&global_trace);
9627
9628 init_trace_flags_index(&global_trace);
9629
9630 register_tracer(&nop_trace);
9631
9632 /* Function tracing may start here (via kernel command line) */
9633 init_function_trace();
9634
9635 /* All seems OK, enable tracing */
9636 tracing_disabled = 0;
9637
9638 atomic_notifier_chain_register(&panic_notifier_list,
9639 &trace_panic_notifier);
9640
9641 register_die_notifier(&trace_die_notifier);
9642
9643 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9644
9645 INIT_LIST_HEAD(&global_trace.systems);
9646 INIT_LIST_HEAD(&global_trace.events);
9647 INIT_LIST_HEAD(&global_trace.hist_vars);
9648 INIT_LIST_HEAD(&global_trace.err_log);
9649 list_add(&global_trace.list, &ftrace_trace_arrays);
9650
9651 apply_trace_boot_options();
9652
9653 register_snapshot_cmd();
9654
9655 return 0;
9656
9657 out_free_savedcmd:
9658 free_saved_cmdlines_buffer(savedcmd);
9659 out_free_temp_buffer:
9660 ring_buffer_free(temp_buffer);
9661 out_rm_hp_state:
9662 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9663 out_free_cpumask:
9664 free_cpumask_var(global_trace.tracing_cpumask);
9665 out_free_buffer_mask:
9666 free_cpumask_var(tracing_buffer_mask);
9667 out:
9668 return ret;
9669 }
9670
early_trace_init(void)9671 void __init early_trace_init(void)
9672 {
9673 if (tracepoint_printk) {
9674 tracepoint_print_iter =
9675 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9676 if (MEM_FAIL(!tracepoint_print_iter,
9677 "Failed to allocate trace iterator\n"))
9678 tracepoint_printk = 0;
9679 else
9680 static_key_enable(&tracepoint_printk_key.key);
9681 }
9682 tracer_alloc_buffers();
9683 }
9684
trace_init(void)9685 void __init trace_init(void)
9686 {
9687 trace_event_init();
9688 }
9689
clear_boot_tracer(void)9690 __init static int clear_boot_tracer(void)
9691 {
9692 /*
9693 * The default tracer at boot buffer is an init section.
9694 * This function is called in lateinit. If we did not
9695 * find the boot tracer, then clear it out, to prevent
9696 * later registration from accessing the buffer that is
9697 * about to be freed.
9698 */
9699 if (!default_bootup_tracer)
9700 return 0;
9701
9702 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9703 default_bootup_tracer);
9704 default_bootup_tracer = NULL;
9705
9706 return 0;
9707 }
9708
9709 fs_initcall(tracer_init_tracefs);
9710 late_initcall_sync(clear_boot_tracer);
9711
9712 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9713 __init static int tracing_set_default_clock(void)
9714 {
9715 /* sched_clock_stable() is determined in late_initcall */
9716 if (!trace_boot_clock && !sched_clock_stable()) {
9717 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9718 pr_warn("Can not set tracing clock due to lockdown\n");
9719 return -EPERM;
9720 }
9721
9722 printk(KERN_WARNING
9723 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9724 "If you want to keep using the local clock, then add:\n"
9725 " \"trace_clock=local\"\n"
9726 "on the kernel command line\n");
9727 tracing_set_clock(&global_trace, "global");
9728 }
9729
9730 return 0;
9731 }
9732 late_initcall_sync(tracing_set_default_clock);
9733 #endif
9734