1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
58 */
59 bool ring_buffer_expanded;
60
61 /*
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
67 */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71 * If boot-time tracing including tracers/events via kernel cmdline
72 * is running, we do not want to run SELFTEST.
73 */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 { }
94 };
95
96 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 return 0;
100 }
101
102 /*
103 * To prevent the comm cache from being overwritten when no
104 * tracing is active, only save the comm when a trace event
105 * occurred.
106 */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110 * Kill all tracing for good (never come back).
111 * It is initialized to 1 but will turn to zero if the initialization
112 * of the tracer is successful. But that is the only place that sets
113 * this back to zero.
114 */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly tracing_buffer_mask;
118
119 /*
120 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121 *
122 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123 * is set, then ftrace_dump is called. This will output the contents
124 * of the ftrace buffers to the console. This is very useful for
125 * capturing traces that lead to crashes and outputing it to a
126 * serial console.
127 *
128 * It is default off, but you can enable it with either specifying
129 * "ftrace_dump_on_oops" in the kernel command line, or setting
130 * /proc/sys/kernel/ftrace_dump_on_oops
131 * Set 1 if you want to dump buffers of all CPUs
132 * Set 2 if you want to dump the buffer of the CPU that triggered oops
133 */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 struct module *mod;
144 unsigned long length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150 /*
151 * "end" is first and points to NULL as it must be different
152 * than "mod" or "eval_string"
153 */
154 union trace_eval_map_item *next;
155 const char *end; /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161 * The trace_eval_maps are saved in an array with two extra elements,
162 * one at the beginning, and one at the end. The beginning item contains
163 * the count of the saved maps (head.length), and the module they
164 * belong to if not built in (head.mod). The ending item contains a
165 * pointer to the next array of saved eval_map items.
166 */
167 union trace_eval_map_item {
168 struct trace_eval_map map;
169 struct trace_eval_map_head head;
170 struct trace_eval_map_tail tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 struct trace_buffer *buffer,
179 unsigned long flags, int pc);
180
181 #define MAX_TRACER_SIZE 100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
set_cmdline_ftrace(char * str)187 static int __init set_cmdline_ftrace(char *str)
188 {
189 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 default_bootup_tracer = bootup_tracer_buf;
191 /* We are using ftrace early, expand it */
192 ring_buffer_expanded = true;
193 return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
set_ftrace_dump_on_oops(char * str)197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199 if (*str++ != '=' || !*str) {
200 ftrace_dump_on_oops = DUMP_ALL;
201 return 1;
202 }
203
204 if (!strcmp("orig_cpu", str)) {
205 ftrace_dump_on_oops = DUMP_ORIG;
206 return 1;
207 }
208
209 return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
stop_trace_on_warning(char * str)213 static int __init stop_trace_on_warning(char *str)
214 {
215 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 __disable_trace_on_warning = 1;
217 return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
boot_alloc_snapshot(char * str)221 static int __init boot_alloc_snapshot(char *str)
222 {
223 allocate_snapshot = true;
224 /* We also need the main ring buffer expanded */
225 ring_buffer_expanded = true;
226 return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
set_trace_boot_options(char * str)233 static int __init set_trace_boot_options(char *str)
234 {
235 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236 return 1;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
set_trace_boot_clock(char * str)243 static int __init set_trace_boot_clock(char *str)
244 {
245 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 trace_boot_clock = trace_boot_clock_buf;
247 return 1;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
set_tracepoint_printk(char * str)251 static int __init set_tracepoint_printk(char *str)
252 {
253 /* Ignore the "tp_printk_stop_on_boot" param */
254 if (*str == '_')
255 return 0;
256
257 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
258 tracepoint_printk = 1;
259 return 1;
260 }
261 __setup("tp_printk", set_tracepoint_printk);
262
ns2usecs(u64 nsec)263 unsigned long long ns2usecs(u64 nsec)
264 {
265 nsec += 500;
266 do_div(nsec, 1000);
267 return nsec;
268 }
269
270 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)271 trace_process_export(struct trace_export *export,
272 struct ring_buffer_event *event, int flag)
273 {
274 struct trace_entry *entry;
275 unsigned int size = 0;
276
277 if (export->flags & flag) {
278 entry = ring_buffer_event_data(event);
279 size = ring_buffer_event_length(event);
280 export->write(export, entry, size);
281 }
282 }
283
284 static DEFINE_MUTEX(ftrace_export_lock);
285
286 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
287
288 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
289 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
290 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
291
ftrace_exports_enable(struct trace_export * export)292 static inline void ftrace_exports_enable(struct trace_export *export)
293 {
294 if (export->flags & TRACE_EXPORT_FUNCTION)
295 static_branch_inc(&trace_function_exports_enabled);
296
297 if (export->flags & TRACE_EXPORT_EVENT)
298 static_branch_inc(&trace_event_exports_enabled);
299
300 if (export->flags & TRACE_EXPORT_MARKER)
301 static_branch_inc(&trace_marker_exports_enabled);
302 }
303
ftrace_exports_disable(struct trace_export * export)304 static inline void ftrace_exports_disable(struct trace_export *export)
305 {
306 if (export->flags & TRACE_EXPORT_FUNCTION)
307 static_branch_dec(&trace_function_exports_enabled);
308
309 if (export->flags & TRACE_EXPORT_EVENT)
310 static_branch_dec(&trace_event_exports_enabled);
311
312 if (export->flags & TRACE_EXPORT_MARKER)
313 static_branch_dec(&trace_marker_exports_enabled);
314 }
315
ftrace_exports(struct ring_buffer_event * event,int flag)316 static void ftrace_exports(struct ring_buffer_event *event, int flag)
317 {
318 struct trace_export *export;
319
320 preempt_disable_notrace();
321
322 export = rcu_dereference_raw_check(ftrace_exports_list);
323 while (export) {
324 trace_process_export(export, event, flag);
325 export = rcu_dereference_raw_check(export->next);
326 }
327
328 preempt_enable_notrace();
329 }
330
331 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)332 add_trace_export(struct trace_export **list, struct trace_export *export)
333 {
334 rcu_assign_pointer(export->next, *list);
335 /*
336 * We are entering export into the list but another
337 * CPU might be walking that list. We need to make sure
338 * the export->next pointer is valid before another CPU sees
339 * the export pointer included into the list.
340 */
341 rcu_assign_pointer(*list, export);
342 }
343
344 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)345 rm_trace_export(struct trace_export **list, struct trace_export *export)
346 {
347 struct trace_export **p;
348
349 for (p = list; *p != NULL; p = &(*p)->next)
350 if (*p == export)
351 break;
352
353 if (*p != export)
354 return -1;
355
356 rcu_assign_pointer(*p, (*p)->next);
357
358 return 0;
359 }
360
361 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)362 add_ftrace_export(struct trace_export **list, struct trace_export *export)
363 {
364 ftrace_exports_enable(export);
365
366 add_trace_export(list, export);
367 }
368
369 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)370 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
371 {
372 int ret;
373
374 ret = rm_trace_export(list, export);
375 ftrace_exports_disable(export);
376
377 return ret;
378 }
379
register_ftrace_export(struct trace_export * export)380 int register_ftrace_export(struct trace_export *export)
381 {
382 if (WARN_ON_ONCE(!export->write))
383 return -1;
384
385 mutex_lock(&ftrace_export_lock);
386
387 add_ftrace_export(&ftrace_exports_list, export);
388
389 mutex_unlock(&ftrace_export_lock);
390
391 return 0;
392 }
393 EXPORT_SYMBOL_GPL(register_ftrace_export);
394
unregister_ftrace_export(struct trace_export * export)395 int unregister_ftrace_export(struct trace_export *export)
396 {
397 int ret;
398
399 mutex_lock(&ftrace_export_lock);
400
401 ret = rm_ftrace_export(&ftrace_exports_list, export);
402
403 mutex_unlock(&ftrace_export_lock);
404
405 return ret;
406 }
407 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
408
409 /* trace_flags holds trace_options default values */
410 #define TRACE_DEFAULT_FLAGS \
411 (FUNCTION_DEFAULT_FLAGS | \
412 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
413 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
414 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
415 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
416
417 /* trace_options that are only supported by global_trace */
418 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
419 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
420
421 /* trace_flags that are default zero for instances */
422 #define ZEROED_TRACE_FLAGS \
423 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
424
425 /*
426 * The global_trace is the descriptor that holds the top-level tracing
427 * buffers for the live tracing.
428 */
429 static struct trace_array global_trace = {
430 .trace_flags = TRACE_DEFAULT_FLAGS,
431 };
432
433 LIST_HEAD(ftrace_trace_arrays);
434
trace_array_get(struct trace_array * this_tr)435 int trace_array_get(struct trace_array *this_tr)
436 {
437 struct trace_array *tr;
438 int ret = -ENODEV;
439
440 mutex_lock(&trace_types_lock);
441 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
442 if (tr == this_tr) {
443 tr->ref++;
444 ret = 0;
445 break;
446 }
447 }
448 mutex_unlock(&trace_types_lock);
449
450 return ret;
451 }
452
__trace_array_put(struct trace_array * this_tr)453 static void __trace_array_put(struct trace_array *this_tr)
454 {
455 WARN_ON(!this_tr->ref);
456 this_tr->ref--;
457 }
458
459 /**
460 * trace_array_put - Decrement the reference counter for this trace array.
461 *
462 * NOTE: Use this when we no longer need the trace array returned by
463 * trace_array_get_by_name(). This ensures the trace array can be later
464 * destroyed.
465 *
466 */
trace_array_put(struct trace_array * this_tr)467 void trace_array_put(struct trace_array *this_tr)
468 {
469 if (!this_tr)
470 return;
471
472 mutex_lock(&trace_types_lock);
473 __trace_array_put(this_tr);
474 mutex_unlock(&trace_types_lock);
475 }
476 EXPORT_SYMBOL_GPL(trace_array_put);
477
tracing_check_open_get_tr(struct trace_array * tr)478 int tracing_check_open_get_tr(struct trace_array *tr)
479 {
480 int ret;
481
482 ret = security_locked_down(LOCKDOWN_TRACEFS);
483 if (ret)
484 return ret;
485
486 if (tracing_disabled)
487 return -ENODEV;
488
489 if (tr && trace_array_get(tr) < 0)
490 return -ENODEV;
491
492 return 0;
493 }
494
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)495 int call_filter_check_discard(struct trace_event_call *call, void *rec,
496 struct trace_buffer *buffer,
497 struct ring_buffer_event *event)
498 {
499 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
500 !filter_match_preds(call->filter, rec)) {
501 __trace_event_discard_commit(buffer, event);
502 return 1;
503 }
504
505 return 0;
506 }
507
trace_free_pid_list(struct trace_pid_list * pid_list)508 void trace_free_pid_list(struct trace_pid_list *pid_list)
509 {
510 vfree(pid_list->pids);
511 kfree(pid_list);
512 }
513
514 /**
515 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
516 * @filtered_pids: The list of pids to check
517 * @search_pid: The PID to find in @filtered_pids
518 *
519 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
520 */
521 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)522 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
523 {
524 /*
525 * If pid_max changed after filtered_pids was created, we
526 * by default ignore all pids greater than the previous pid_max.
527 */
528 if (search_pid >= filtered_pids->pid_max)
529 return false;
530
531 return test_bit(search_pid, filtered_pids->pids);
532 }
533
534 /**
535 * trace_ignore_this_task - should a task be ignored for tracing
536 * @filtered_pids: The list of pids to check
537 * @task: The task that should be ignored if not filtered
538 *
539 * Checks if @task should be traced or not from @filtered_pids.
540 * Returns true if @task should *NOT* be traced.
541 * Returns false if @task should be traced.
542 */
543 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)544 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
545 struct trace_pid_list *filtered_no_pids,
546 struct task_struct *task)
547 {
548 /*
549 * If filterd_no_pids is not empty, and the task's pid is listed
550 * in filtered_no_pids, then return true.
551 * Otherwise, if filtered_pids is empty, that means we can
552 * trace all tasks. If it has content, then only trace pids
553 * within filtered_pids.
554 */
555
556 return (filtered_pids &&
557 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
558 (filtered_no_pids &&
559 trace_find_filtered_pid(filtered_no_pids, task->pid));
560 }
561
562 /**
563 * trace_filter_add_remove_task - Add or remove a task from a pid_list
564 * @pid_list: The list to modify
565 * @self: The current task for fork or NULL for exit
566 * @task: The task to add or remove
567 *
568 * If adding a task, if @self is defined, the task is only added if @self
569 * is also included in @pid_list. This happens on fork and tasks should
570 * only be added when the parent is listed. If @self is NULL, then the
571 * @task pid will be removed from the list, which would happen on exit
572 * of a task.
573 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)574 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
575 struct task_struct *self,
576 struct task_struct *task)
577 {
578 if (!pid_list)
579 return;
580
581 /* For forks, we only add if the forking task is listed */
582 if (self) {
583 if (!trace_find_filtered_pid(pid_list, self->pid))
584 return;
585 }
586
587 /* Sorry, but we don't support pid_max changing after setting */
588 if (task->pid >= pid_list->pid_max)
589 return;
590
591 /* "self" is set for forks, and NULL for exits */
592 if (self)
593 set_bit(task->pid, pid_list->pids);
594 else
595 clear_bit(task->pid, pid_list->pids);
596 }
597
598 /**
599 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
600 * @pid_list: The pid list to show
601 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
602 * @pos: The position of the file
603 *
604 * This is used by the seq_file "next" operation to iterate the pids
605 * listed in a trace_pid_list structure.
606 *
607 * Returns the pid+1 as we want to display pid of zero, but NULL would
608 * stop the iteration.
609 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)610 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
611 {
612 unsigned long pid = (unsigned long)v;
613
614 (*pos)++;
615
616 /* pid already is +1 of the actual prevous bit */
617 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
618
619 /* Return pid + 1 to allow zero to be represented */
620 if (pid < pid_list->pid_max)
621 return (void *)(pid + 1);
622
623 return NULL;
624 }
625
626 /**
627 * trace_pid_start - Used for seq_file to start reading pid lists
628 * @pid_list: The pid list to show
629 * @pos: The position of the file
630 *
631 * This is used by seq_file "start" operation to start the iteration
632 * of listing pids.
633 *
634 * Returns the pid+1 as we want to display pid of zero, but NULL would
635 * stop the iteration.
636 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)637 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
638 {
639 unsigned long pid;
640 loff_t l = 0;
641
642 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
643 if (pid >= pid_list->pid_max)
644 return NULL;
645
646 /* Return pid + 1 so that zero can be the exit value */
647 for (pid++; pid && l < *pos;
648 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
649 ;
650 return (void *)pid;
651 }
652
653 /**
654 * trace_pid_show - show the current pid in seq_file processing
655 * @m: The seq_file structure to write into
656 * @v: A void pointer of the pid (+1) value to display
657 *
658 * Can be directly used by seq_file operations to display the current
659 * pid value.
660 */
trace_pid_show(struct seq_file * m,void * v)661 int trace_pid_show(struct seq_file *m, void *v)
662 {
663 unsigned long pid = (unsigned long)v - 1;
664
665 seq_printf(m, "%lu\n", pid);
666 return 0;
667 }
668
669 /* 128 should be much more than enough */
670 #define PID_BUF_SIZE 127
671
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)672 int trace_pid_write(struct trace_pid_list *filtered_pids,
673 struct trace_pid_list **new_pid_list,
674 const char __user *ubuf, size_t cnt)
675 {
676 struct trace_pid_list *pid_list;
677 struct trace_parser parser;
678 unsigned long val;
679 int nr_pids = 0;
680 ssize_t read = 0;
681 ssize_t ret = 0;
682 loff_t pos;
683 pid_t pid;
684
685 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
686 return -ENOMEM;
687
688 /*
689 * Always recreate a new array. The write is an all or nothing
690 * operation. Always create a new array when adding new pids by
691 * the user. If the operation fails, then the current list is
692 * not modified.
693 */
694 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
695 if (!pid_list) {
696 trace_parser_put(&parser);
697 return -ENOMEM;
698 }
699
700 pid_list->pid_max = READ_ONCE(pid_max);
701
702 /* Only truncating will shrink pid_max */
703 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
704 pid_list->pid_max = filtered_pids->pid_max;
705
706 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
707 if (!pid_list->pids) {
708 trace_parser_put(&parser);
709 kfree(pid_list);
710 return -ENOMEM;
711 }
712
713 if (filtered_pids) {
714 /* copy the current bits to the new max */
715 for_each_set_bit(pid, filtered_pids->pids,
716 filtered_pids->pid_max) {
717 set_bit(pid, pid_list->pids);
718 nr_pids++;
719 }
720 }
721
722 while (cnt > 0) {
723
724 pos = 0;
725
726 ret = trace_get_user(&parser, ubuf, cnt, &pos);
727 if (ret < 0 || !trace_parser_loaded(&parser))
728 break;
729
730 read += ret;
731 ubuf += ret;
732 cnt -= ret;
733
734 ret = -EINVAL;
735 if (kstrtoul(parser.buffer, 0, &val))
736 break;
737 if (val >= pid_list->pid_max)
738 break;
739
740 pid = (pid_t)val;
741
742 set_bit(pid, pid_list->pids);
743 nr_pids++;
744
745 trace_parser_clear(&parser);
746 ret = 0;
747 }
748 trace_parser_put(&parser);
749
750 if (ret < 0) {
751 trace_free_pid_list(pid_list);
752 return ret;
753 }
754
755 if (!nr_pids) {
756 /* Cleared the list of pids */
757 trace_free_pid_list(pid_list);
758 read = ret;
759 pid_list = NULL;
760 }
761
762 *new_pid_list = pid_list;
763
764 return read;
765 }
766
buffer_ftrace_now(struct array_buffer * buf,int cpu)767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769 u64 ts;
770
771 /* Early boot up does not have a buffer yet */
772 if (!buf->buffer)
773 return trace_clock_local();
774
775 ts = ring_buffer_time_stamp(buf->buffer, cpu);
776 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778 return ts;
779 }
780
ftrace_now(int cpu)781 u64 ftrace_now(int cpu)
782 {
783 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787 * tracing_is_enabled - Show if global_trace has been disabled
788 *
789 * Shows if the global trace has been enabled or not. It uses the
790 * mirror flag "buffer_disabled" to be used in fast paths such as for
791 * the irqsoff tracer. But it may be inaccurate due to races. If you
792 * need to know the accurate state, use tracing_is_on() which is a little
793 * slower, but accurate.
794 */
tracing_is_enabled(void)795 int tracing_is_enabled(void)
796 {
797 /*
798 * For quick access (irqsoff uses this in fast path), just
799 * return the mirror variable of the state of the ring buffer.
800 * It's a little racy, but we don't really care.
801 */
802 smp_rmb();
803 return !global_trace.buffer_disabled;
804 }
805
806 /*
807 * trace_buf_size is the size in bytes that is allocated
808 * for a buffer. Note, the number of bytes is always rounded
809 * to page size.
810 *
811 * This number is purposely set to a low number of 16384.
812 * If the dump on oops happens, it will be much appreciated
813 * to not have to wait for all that output. Anyway this can be
814 * boot time and run time configurable.
815 */
816 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer *trace_types __read_mostly;
822
823 /*
824 * trace_types_lock is used to protect the trace_types list.
825 */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829 * serialize the access of the ring buffer
830 *
831 * ring buffer serializes readers, but it is low level protection.
832 * The validity of the events (which returns by ring_buffer_peek() ..etc)
833 * are not protected by ring buffer.
834 *
835 * The content of events may become garbage if we allow other process consumes
836 * these events concurrently:
837 * A) the page of the consumed events may become a normal page
838 * (not reader page) in ring buffer, and this page will be rewrited
839 * by events producer.
840 * B) The page of the consumed events may become a page for splice_read,
841 * and this page will be returned to system.
842 *
843 * These primitives allow multi process access to different cpu ring buffer
844 * concurrently.
845 *
846 * These primitives don't distinguish read-only and read-consume access.
847 * Multi read-only access are also serialized.
848 */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
trace_access_lock(int cpu)854 static inline void trace_access_lock(int cpu)
855 {
856 if (cpu == RING_BUFFER_ALL_CPUS) {
857 /* gain it for accessing the whole ring buffer. */
858 down_write(&all_cpu_access_lock);
859 } else {
860 /* gain it for accessing a cpu ring buffer. */
861
862 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863 down_read(&all_cpu_access_lock);
864
865 /* Secondly block other access to this @cpu ring buffer. */
866 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867 }
868 }
869
trace_access_unlock(int cpu)870 static inline void trace_access_unlock(int cpu)
871 {
872 if (cpu == RING_BUFFER_ALL_CPUS) {
873 up_write(&all_cpu_access_lock);
874 } else {
875 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876 up_read(&all_cpu_access_lock);
877 }
878 }
879
trace_access_lock_init(void)880 static inline void trace_access_lock_init(void)
881 {
882 int cpu;
883
884 for_each_possible_cpu(cpu)
885 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
trace_access_lock(int cpu)892 static inline void trace_access_lock(int cpu)
893 {
894 (void)cpu;
895 mutex_lock(&access_lock);
896 }
897
trace_access_unlock(int cpu)898 static inline void trace_access_unlock(int cpu)
899 {
900 (void)cpu;
901 mutex_unlock(&access_lock);
902 }
903
trace_access_lock_init(void)904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912 unsigned long flags,
913 int skip, int pc, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915 struct trace_buffer *buffer,
916 unsigned long flags,
917 int skip, int pc, struct pt_regs *regs);
918
919 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921 unsigned long flags,
922 int skip, int pc, struct pt_regs *regs)
923 {
924 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)925 static inline void ftrace_trace_stack(struct trace_array *tr,
926 struct trace_buffer *buffer,
927 unsigned long flags,
928 int skip, int pc, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)935 trace_event_setup(struct ring_buffer_event *event,
936 int type, unsigned long flags, int pc)
937 {
938 struct trace_entry *ent = ring_buffer_event_data(event);
939
940 tracing_generic_entry_update(ent, type, flags, pc);
941 }
942
943 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945 int type,
946 unsigned long len,
947 unsigned long flags, int pc)
948 {
949 struct ring_buffer_event *event;
950
951 event = ring_buffer_lock_reserve(buffer, len);
952 if (event != NULL)
953 trace_event_setup(event, type, flags, pc);
954
955 return event;
956 }
957
tracer_tracing_on(struct trace_array * tr)958 void tracer_tracing_on(struct trace_array *tr)
959 {
960 if (tr->array_buffer.buffer)
961 ring_buffer_record_on(tr->array_buffer.buffer);
962 /*
963 * This flag is looked at when buffers haven't been allocated
964 * yet, or by some tracers (like irqsoff), that just want to
965 * know if the ring buffer has been disabled, but it can handle
966 * races of where it gets disabled but we still do a record.
967 * As the check is in the fast path of the tracers, it is more
968 * important to be fast than accurate.
969 */
970 tr->buffer_disabled = 0;
971 /* Make the flag seen by readers */
972 smp_wmb();
973 }
974
975 /**
976 * tracing_on - enable tracing buffers
977 *
978 * This function enables tracing buffers that may have been
979 * disabled with tracing_off.
980 */
tracing_on(void)981 void tracing_on(void)
982 {
983 tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991 __this_cpu_write(trace_taskinfo_save, true);
992
993 /* If this is the temp buffer, we need to commit fully */
994 if (this_cpu_read(trace_buffered_event) == event) {
995 /* Length is in event->array[0] */
996 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997 /* Release the temp buffer */
998 this_cpu_dec(trace_buffered_event_cnt);
999 } else
1000 ring_buffer_unlock_commit(buffer, event);
1001 }
1002
1003 /**
1004 * __trace_puts - write a constant string into the trace buffer.
1005 * @ip: The address of the caller
1006 * @str: The constant string to write
1007 * @size: The size of the string.
1008 */
__trace_puts(unsigned long ip,const char * str,int size)1009 int __trace_puts(unsigned long ip, const char *str, int size)
1010 {
1011 struct ring_buffer_event *event;
1012 struct trace_buffer *buffer;
1013 struct print_entry *entry;
1014 unsigned long irq_flags;
1015 int alloc;
1016 int pc;
1017
1018 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1019 return 0;
1020
1021 pc = preempt_count();
1022
1023 if (unlikely(tracing_selftest_running || tracing_disabled))
1024 return 0;
1025
1026 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1027
1028 local_save_flags(irq_flags);
1029 buffer = global_trace.array_buffer.buffer;
1030 ring_buffer_nest_start(buffer);
1031 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1032 irq_flags, pc);
1033 if (!event) {
1034 size = 0;
1035 goto out;
1036 }
1037
1038 entry = ring_buffer_event_data(event);
1039 entry->ip = ip;
1040
1041 memcpy(&entry->buf, str, size);
1042
1043 /* Add a newline if necessary */
1044 if (entry->buf[size - 1] != '\n') {
1045 entry->buf[size] = '\n';
1046 entry->buf[size + 1] = '\0';
1047 } else
1048 entry->buf[size] = '\0';
1049
1050 __buffer_unlock_commit(buffer, event);
1051 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1052 out:
1053 ring_buffer_nest_end(buffer);
1054 return size;
1055 }
1056 EXPORT_SYMBOL_GPL(__trace_puts);
1057
1058 /**
1059 * __trace_bputs - write the pointer to a constant string into trace buffer
1060 * @ip: The address of the caller
1061 * @str: The constant string to write to the buffer to
1062 */
__trace_bputs(unsigned long ip,const char * str)1063 int __trace_bputs(unsigned long ip, const char *str)
1064 {
1065 struct ring_buffer_event *event;
1066 struct trace_buffer *buffer;
1067 struct bputs_entry *entry;
1068 unsigned long irq_flags;
1069 int size = sizeof(struct bputs_entry);
1070 int ret = 0;
1071 int pc;
1072
1073 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1074 return 0;
1075
1076 pc = preempt_count();
1077
1078 if (unlikely(tracing_selftest_running || tracing_disabled))
1079 return 0;
1080
1081 local_save_flags(irq_flags);
1082 buffer = global_trace.array_buffer.buffer;
1083
1084 ring_buffer_nest_start(buffer);
1085 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1086 irq_flags, pc);
1087 if (!event)
1088 goto out;
1089
1090 entry = ring_buffer_event_data(event);
1091 entry->ip = ip;
1092 entry->str = str;
1093
1094 __buffer_unlock_commit(buffer, event);
1095 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1096
1097 ret = 1;
1098 out:
1099 ring_buffer_nest_end(buffer);
1100 return ret;
1101 }
1102 EXPORT_SYMBOL_GPL(__trace_bputs);
1103
1104 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1105 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1106 void *cond_data)
1107 {
1108 struct tracer *tracer = tr->current_trace;
1109 unsigned long flags;
1110
1111 if (in_nmi()) {
1112 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1113 internal_trace_puts("*** snapshot is being ignored ***\n");
1114 return;
1115 }
1116
1117 if (!tr->allocated_snapshot) {
1118 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1119 internal_trace_puts("*** stopping trace here! ***\n");
1120 tracing_off();
1121 return;
1122 }
1123
1124 /* Note, snapshot can not be used when the tracer uses it */
1125 if (tracer->use_max_tr) {
1126 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1127 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1128 return;
1129 }
1130
1131 local_irq_save(flags);
1132 update_max_tr(tr, current, smp_processor_id(), cond_data);
1133 local_irq_restore(flags);
1134 }
1135
tracing_snapshot_instance(struct trace_array * tr)1136 void tracing_snapshot_instance(struct trace_array *tr)
1137 {
1138 tracing_snapshot_instance_cond(tr, NULL);
1139 }
1140
1141 /**
1142 * tracing_snapshot - take a snapshot of the current buffer.
1143 *
1144 * This causes a swap between the snapshot buffer and the current live
1145 * tracing buffer. You can use this to take snapshots of the live
1146 * trace when some condition is triggered, but continue to trace.
1147 *
1148 * Note, make sure to allocate the snapshot with either
1149 * a tracing_snapshot_alloc(), or by doing it manually
1150 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1151 *
1152 * If the snapshot buffer is not allocated, it will stop tracing.
1153 * Basically making a permanent snapshot.
1154 */
tracing_snapshot(void)1155 void tracing_snapshot(void)
1156 {
1157 struct trace_array *tr = &global_trace;
1158
1159 tracing_snapshot_instance(tr);
1160 }
1161 EXPORT_SYMBOL_GPL(tracing_snapshot);
1162
1163 /**
1164 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1165 * @tr: The tracing instance to snapshot
1166 * @cond_data: The data to be tested conditionally, and possibly saved
1167 *
1168 * This is the same as tracing_snapshot() except that the snapshot is
1169 * conditional - the snapshot will only happen if the
1170 * cond_snapshot.update() implementation receiving the cond_data
1171 * returns true, which means that the trace array's cond_snapshot
1172 * update() operation used the cond_data to determine whether the
1173 * snapshot should be taken, and if it was, presumably saved it along
1174 * with the snapshot.
1175 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1176 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1177 {
1178 tracing_snapshot_instance_cond(tr, cond_data);
1179 }
1180 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1181
1182 /**
1183 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1184 * @tr: The tracing instance
1185 *
1186 * When the user enables a conditional snapshot using
1187 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1188 * with the snapshot. This accessor is used to retrieve it.
1189 *
1190 * Should not be called from cond_snapshot.update(), since it takes
1191 * the tr->max_lock lock, which the code calling
1192 * cond_snapshot.update() has already done.
1193 *
1194 * Returns the cond_data associated with the trace array's snapshot.
1195 */
tracing_cond_snapshot_data(struct trace_array * tr)1196 void *tracing_cond_snapshot_data(struct trace_array *tr)
1197 {
1198 void *cond_data = NULL;
1199
1200 local_irq_disable();
1201 arch_spin_lock(&tr->max_lock);
1202
1203 if (tr->cond_snapshot)
1204 cond_data = tr->cond_snapshot->cond_data;
1205
1206 arch_spin_unlock(&tr->max_lock);
1207 local_irq_enable();
1208
1209 return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214 struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
tracing_alloc_snapshot_instance(struct trace_array * tr)1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219 int ret;
1220
1221 if (!tr->allocated_snapshot) {
1222
1223 /* allocate spare buffer */
1224 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226 if (ret < 0)
1227 return ret;
1228
1229 tr->allocated_snapshot = true;
1230 }
1231
1232 return 0;
1233 }
1234
free_snapshot(struct trace_array * tr)1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237 /*
1238 * We don't free the ring buffer. instead, resize it because
1239 * The max_tr ring buffer has some state (e.g. ring->clock) and
1240 * we want preserve it.
1241 */
1242 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243 set_buffer_entries(&tr->max_buffer, 1);
1244 tracing_reset_online_cpus(&tr->max_buffer);
1245 tr->allocated_snapshot = false;
1246 }
1247
1248 /**
1249 * tracing_alloc_snapshot - allocate snapshot buffer.
1250 *
1251 * This only allocates the snapshot buffer if it isn't already
1252 * allocated - it doesn't also take a snapshot.
1253 *
1254 * This is meant to be used in cases where the snapshot buffer needs
1255 * to be set up for events that can't sleep but need to be able to
1256 * trigger a snapshot.
1257 */
tracing_alloc_snapshot(void)1258 int tracing_alloc_snapshot(void)
1259 {
1260 struct trace_array *tr = &global_trace;
1261 int ret;
1262
1263 ret = tracing_alloc_snapshot_instance(tr);
1264 WARN_ON(ret < 0);
1265
1266 return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270 /**
1271 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272 *
1273 * This is similar to tracing_snapshot(), but it will allocate the
1274 * snapshot buffer if it isn't already allocated. Use this only
1275 * where it is safe to sleep, as the allocation may sleep.
1276 *
1277 * This causes a swap between the snapshot buffer and the current live
1278 * tracing buffer. You can use this to take snapshots of the live
1279 * trace when some condition is triggered, but continue to trace.
1280 */
tracing_snapshot_alloc(void)1281 void tracing_snapshot_alloc(void)
1282 {
1283 int ret;
1284
1285 ret = tracing_alloc_snapshot();
1286 if (ret < 0)
1287 return;
1288
1289 tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293 /**
1294 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295 * @tr: The tracing instance
1296 * @cond_data: User data to associate with the snapshot
1297 * @update: Implementation of the cond_snapshot update function
1298 *
1299 * Check whether the conditional snapshot for the given instance has
1300 * already been enabled, or if the current tracer is already using a
1301 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302 * save the cond_data and update function inside.
1303 *
1304 * Returns 0 if successful, error otherwise.
1305 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307 cond_update_fn_t update)
1308 {
1309 struct cond_snapshot *cond_snapshot;
1310 int ret = 0;
1311
1312 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313 if (!cond_snapshot)
1314 return -ENOMEM;
1315
1316 cond_snapshot->cond_data = cond_data;
1317 cond_snapshot->update = update;
1318
1319 mutex_lock(&trace_types_lock);
1320
1321 ret = tracing_alloc_snapshot_instance(tr);
1322 if (ret)
1323 goto fail_unlock;
1324
1325 if (tr->current_trace->use_max_tr) {
1326 ret = -EBUSY;
1327 goto fail_unlock;
1328 }
1329
1330 /*
1331 * The cond_snapshot can only change to NULL without the
1332 * trace_types_lock. We don't care if we race with it going
1333 * to NULL, but we want to make sure that it's not set to
1334 * something other than NULL when we get here, which we can
1335 * do safely with only holding the trace_types_lock and not
1336 * having to take the max_lock.
1337 */
1338 if (tr->cond_snapshot) {
1339 ret = -EBUSY;
1340 goto fail_unlock;
1341 }
1342
1343 local_irq_disable();
1344 arch_spin_lock(&tr->max_lock);
1345 tr->cond_snapshot = cond_snapshot;
1346 arch_spin_unlock(&tr->max_lock);
1347 local_irq_enable();
1348
1349 mutex_unlock(&trace_types_lock);
1350
1351 return ret;
1352
1353 fail_unlock:
1354 mutex_unlock(&trace_types_lock);
1355 kfree(cond_snapshot);
1356 return ret;
1357 }
1358 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1359
1360 /**
1361 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1362 * @tr: The tracing instance
1363 *
1364 * Check whether the conditional snapshot for the given instance is
1365 * enabled; if so, free the cond_snapshot associated with it,
1366 * otherwise return -EINVAL.
1367 *
1368 * Returns 0 if successful, error otherwise.
1369 */
tracing_snapshot_cond_disable(struct trace_array * tr)1370 int tracing_snapshot_cond_disable(struct trace_array *tr)
1371 {
1372 int ret = 0;
1373
1374 local_irq_disable();
1375 arch_spin_lock(&tr->max_lock);
1376
1377 if (!tr->cond_snapshot)
1378 ret = -EINVAL;
1379 else {
1380 kfree(tr->cond_snapshot);
1381 tr->cond_snapshot = NULL;
1382 }
1383
1384 arch_spin_unlock(&tr->max_lock);
1385 local_irq_enable();
1386
1387 return ret;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1390 #else
tracing_snapshot(void)1391 void tracing_snapshot(void)
1392 {
1393 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1396 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1397 {
1398 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1401 int tracing_alloc_snapshot(void)
1402 {
1403 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1404 return -ENODEV;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1407 void tracing_snapshot_alloc(void)
1408 {
1409 /* Give warning */
1410 tracing_snapshot();
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1413 void *tracing_cond_snapshot_data(struct trace_array *tr)
1414 {
1415 return NULL;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1418 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1419 {
1420 return -ENODEV;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1423 int tracing_snapshot_cond_disable(struct trace_array *tr)
1424 {
1425 return false;
1426 }
1427 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1428 #endif /* CONFIG_TRACER_SNAPSHOT */
1429
tracer_tracing_off(struct trace_array * tr)1430 void tracer_tracing_off(struct trace_array *tr)
1431 {
1432 if (tr->array_buffer.buffer)
1433 ring_buffer_record_off(tr->array_buffer.buffer);
1434 /*
1435 * This flag is looked at when buffers haven't been allocated
1436 * yet, or by some tracers (like irqsoff), that just want to
1437 * know if the ring buffer has been disabled, but it can handle
1438 * races of where it gets disabled but we still do a record.
1439 * As the check is in the fast path of the tracers, it is more
1440 * important to be fast than accurate.
1441 */
1442 tr->buffer_disabled = 1;
1443 /* Make the flag seen by readers */
1444 smp_wmb();
1445 }
1446
1447 /**
1448 * tracing_off - turn off tracing buffers
1449 *
1450 * This function stops the tracing buffers from recording data.
1451 * It does not disable any overhead the tracers themselves may
1452 * be causing. This function simply causes all recording to
1453 * the ring buffers to fail.
1454 */
tracing_off(void)1455 void tracing_off(void)
1456 {
1457 tracer_tracing_off(&global_trace);
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_off);
1460
disable_trace_on_warning(void)1461 void disable_trace_on_warning(void)
1462 {
1463 if (__disable_trace_on_warning) {
1464 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1465 "Disabling tracing due to warning\n");
1466 tracing_off();
1467 }
1468 }
1469
1470 /**
1471 * tracer_tracing_is_on - show real state of ring buffer enabled
1472 * @tr : the trace array to know if ring buffer is enabled
1473 *
1474 * Shows real state of the ring buffer if it is enabled or not.
1475 */
tracer_tracing_is_on(struct trace_array * tr)1476 bool tracer_tracing_is_on(struct trace_array *tr)
1477 {
1478 if (tr->array_buffer.buffer)
1479 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1480 return !tr->buffer_disabled;
1481 }
1482
1483 /**
1484 * tracing_is_on - show state of ring buffers enabled
1485 */
tracing_is_on(void)1486 int tracing_is_on(void)
1487 {
1488 return tracer_tracing_is_on(&global_trace);
1489 }
1490 EXPORT_SYMBOL_GPL(tracing_is_on);
1491
set_buf_size(char * str)1492 static int __init set_buf_size(char *str)
1493 {
1494 unsigned long buf_size;
1495
1496 if (!str)
1497 return 0;
1498 buf_size = memparse(str, &str);
1499 /*
1500 * nr_entries can not be zero and the startup
1501 * tests require some buffer space. Therefore
1502 * ensure we have at least 4096 bytes of buffer.
1503 */
1504 trace_buf_size = max(4096UL, buf_size);
1505 return 1;
1506 }
1507 __setup("trace_buf_size=", set_buf_size);
1508
set_tracing_thresh(char * str)1509 static int __init set_tracing_thresh(char *str)
1510 {
1511 unsigned long threshold;
1512 int ret;
1513
1514 if (!str)
1515 return 0;
1516 ret = kstrtoul(str, 0, &threshold);
1517 if (ret < 0)
1518 return 0;
1519 tracing_thresh = threshold * 1000;
1520 return 1;
1521 }
1522 __setup("tracing_thresh=", set_tracing_thresh);
1523
nsecs_to_usecs(unsigned long nsecs)1524 unsigned long nsecs_to_usecs(unsigned long nsecs)
1525 {
1526 return nsecs / 1000;
1527 }
1528
1529 /*
1530 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1531 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1532 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1533 * of strings in the order that the evals (enum) were defined.
1534 */
1535 #undef C
1536 #define C(a, b) b
1537
1538 /* These must match the bit postions in trace_iterator_flags */
1539 static const char *trace_options[] = {
1540 TRACE_FLAGS
1541 NULL
1542 };
1543
1544 static struct {
1545 u64 (*func)(void);
1546 const char *name;
1547 int in_ns; /* is this clock in nanoseconds? */
1548 } trace_clocks[] = {
1549 { trace_clock_local, "local", 1 },
1550 { trace_clock_global, "global", 1 },
1551 { trace_clock_counter, "counter", 0 },
1552 { trace_clock_jiffies, "uptime", 0 },
1553 { trace_clock, "perf", 1 },
1554 { ktime_get_mono_fast_ns, "mono", 1 },
1555 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1556 { ktime_get_boot_fast_ns, "boot", 1 },
1557 ARCH_TRACE_CLOCKS
1558 };
1559
trace_clock_in_ns(struct trace_array * tr)1560 bool trace_clock_in_ns(struct trace_array *tr)
1561 {
1562 if (trace_clocks[tr->clock_id].in_ns)
1563 return true;
1564
1565 return false;
1566 }
1567
1568 /*
1569 * trace_parser_get_init - gets the buffer for trace parser
1570 */
trace_parser_get_init(struct trace_parser * parser,int size)1571 int trace_parser_get_init(struct trace_parser *parser, int size)
1572 {
1573 memset(parser, 0, sizeof(*parser));
1574
1575 parser->buffer = kmalloc(size, GFP_KERNEL);
1576 if (!parser->buffer)
1577 return 1;
1578
1579 parser->size = size;
1580 return 0;
1581 }
1582
1583 /*
1584 * trace_parser_put - frees the buffer for trace parser
1585 */
trace_parser_put(struct trace_parser * parser)1586 void trace_parser_put(struct trace_parser *parser)
1587 {
1588 kfree(parser->buffer);
1589 parser->buffer = NULL;
1590 }
1591
1592 /*
1593 * trace_get_user - reads the user input string separated by space
1594 * (matched by isspace(ch))
1595 *
1596 * For each string found the 'struct trace_parser' is updated,
1597 * and the function returns.
1598 *
1599 * Returns number of bytes read.
1600 *
1601 * See kernel/trace/trace.h for 'struct trace_parser' details.
1602 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1603 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1604 size_t cnt, loff_t *ppos)
1605 {
1606 char ch;
1607 size_t read = 0;
1608 ssize_t ret;
1609
1610 if (!*ppos)
1611 trace_parser_clear(parser);
1612
1613 ret = get_user(ch, ubuf++);
1614 if (ret)
1615 goto out;
1616
1617 read++;
1618 cnt--;
1619
1620 /*
1621 * The parser is not finished with the last write,
1622 * continue reading the user input without skipping spaces.
1623 */
1624 if (!parser->cont) {
1625 /* skip white space */
1626 while (cnt && isspace(ch)) {
1627 ret = get_user(ch, ubuf++);
1628 if (ret)
1629 goto out;
1630 read++;
1631 cnt--;
1632 }
1633
1634 parser->idx = 0;
1635
1636 /* only spaces were written */
1637 if (isspace(ch) || !ch) {
1638 *ppos += read;
1639 ret = read;
1640 goto out;
1641 }
1642 }
1643
1644 /* read the non-space input */
1645 while (cnt && !isspace(ch) && ch) {
1646 if (parser->idx < parser->size - 1)
1647 parser->buffer[parser->idx++] = ch;
1648 else {
1649 ret = -EINVAL;
1650 goto out;
1651 }
1652 ret = get_user(ch, ubuf++);
1653 if (ret)
1654 goto out;
1655 read++;
1656 cnt--;
1657 }
1658
1659 /* We either got finished input or we have to wait for another call. */
1660 if (isspace(ch) || !ch) {
1661 parser->buffer[parser->idx] = 0;
1662 parser->cont = false;
1663 } else if (parser->idx < parser->size - 1) {
1664 parser->cont = true;
1665 parser->buffer[parser->idx++] = ch;
1666 /* Make sure the parsed string always terminates with '\0'. */
1667 parser->buffer[parser->idx] = 0;
1668 } else {
1669 ret = -EINVAL;
1670 goto out;
1671 }
1672
1673 *ppos += read;
1674 ret = read;
1675
1676 out:
1677 return ret;
1678 }
1679
1680 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1681 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1682 {
1683 int len;
1684
1685 if (trace_seq_used(s) <= s->seq.readpos)
1686 return -EBUSY;
1687
1688 len = trace_seq_used(s) - s->seq.readpos;
1689 if (cnt > len)
1690 cnt = len;
1691 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1692
1693 s->seq.readpos += cnt;
1694 return cnt;
1695 }
1696
1697 unsigned long __read_mostly tracing_thresh;
1698 static const struct file_operations tracing_max_lat_fops;
1699
1700 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1701 defined(CONFIG_FSNOTIFY)
1702
1703 static struct workqueue_struct *fsnotify_wq;
1704
latency_fsnotify_workfn(struct work_struct * work)1705 static void latency_fsnotify_workfn(struct work_struct *work)
1706 {
1707 struct trace_array *tr = container_of(work, struct trace_array,
1708 fsnotify_work);
1709 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1710 }
1711
latency_fsnotify_workfn_irq(struct irq_work * iwork)1712 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1713 {
1714 struct trace_array *tr = container_of(iwork, struct trace_array,
1715 fsnotify_irqwork);
1716 queue_work(fsnotify_wq, &tr->fsnotify_work);
1717 }
1718
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1719 static void trace_create_maxlat_file(struct trace_array *tr,
1720 struct dentry *d_tracer)
1721 {
1722 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1723 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1724 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1725 d_tracer, &tr->max_latency,
1726 &tracing_max_lat_fops);
1727 }
1728
latency_fsnotify_init(void)1729 __init static int latency_fsnotify_init(void)
1730 {
1731 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1732 WQ_UNBOUND | WQ_HIGHPRI, 0);
1733 if (!fsnotify_wq) {
1734 pr_err("Unable to allocate tr_max_lat_wq\n");
1735 return -ENOMEM;
1736 }
1737 return 0;
1738 }
1739
1740 late_initcall_sync(latency_fsnotify_init);
1741
latency_fsnotify(struct trace_array * tr)1742 void latency_fsnotify(struct trace_array *tr)
1743 {
1744 if (!fsnotify_wq)
1745 return;
1746 /*
1747 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1748 * possible that we are called from __schedule() or do_idle(), which
1749 * could cause a deadlock.
1750 */
1751 irq_work_queue(&tr->fsnotify_irqwork);
1752 }
1753
1754 /*
1755 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1756 * defined(CONFIG_FSNOTIFY)
1757 */
1758 #else
1759
1760 #define trace_create_maxlat_file(tr, d_tracer) \
1761 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1762 &tr->max_latency, &tracing_max_lat_fops)
1763
1764 #endif
1765
1766 #ifdef CONFIG_TRACER_MAX_TRACE
1767 /*
1768 * Copy the new maximum trace into the separate maximum-trace
1769 * structure. (this way the maximum trace is permanently saved,
1770 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1771 */
1772 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1773 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1774 {
1775 struct array_buffer *trace_buf = &tr->array_buffer;
1776 struct array_buffer *max_buf = &tr->max_buffer;
1777 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1778 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1779
1780 max_buf->cpu = cpu;
1781 max_buf->time_start = data->preempt_timestamp;
1782
1783 max_data->saved_latency = tr->max_latency;
1784 max_data->critical_start = data->critical_start;
1785 max_data->critical_end = data->critical_end;
1786
1787 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1788 max_data->pid = tsk->pid;
1789 /*
1790 * If tsk == current, then use current_uid(), as that does not use
1791 * RCU. The irq tracer can be called out of RCU scope.
1792 */
1793 if (tsk == current)
1794 max_data->uid = current_uid();
1795 else
1796 max_data->uid = task_uid(tsk);
1797
1798 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1799 max_data->policy = tsk->policy;
1800 max_data->rt_priority = tsk->rt_priority;
1801
1802 /* record this tasks comm */
1803 tracing_record_cmdline(tsk);
1804 latency_fsnotify(tr);
1805 }
1806
1807 /**
1808 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1809 * @tr: tracer
1810 * @tsk: the task with the latency
1811 * @cpu: The cpu that initiated the trace.
1812 * @cond_data: User data associated with a conditional snapshot
1813 *
1814 * Flip the buffers between the @tr and the max_tr and record information
1815 * about which task was the cause of this latency.
1816 */
1817 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1818 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1819 void *cond_data)
1820 {
1821 if (tr->stop_count)
1822 return;
1823
1824 WARN_ON_ONCE(!irqs_disabled());
1825
1826 if (!tr->allocated_snapshot) {
1827 /* Only the nop tracer should hit this when disabling */
1828 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1829 return;
1830 }
1831
1832 arch_spin_lock(&tr->max_lock);
1833
1834 /* Inherit the recordable setting from array_buffer */
1835 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1836 ring_buffer_record_on(tr->max_buffer.buffer);
1837 else
1838 ring_buffer_record_off(tr->max_buffer.buffer);
1839
1840 #ifdef CONFIG_TRACER_SNAPSHOT
1841 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1842 goto out_unlock;
1843 #endif
1844 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1845
1846 __update_max_tr(tr, tsk, cpu);
1847
1848 out_unlock:
1849 arch_spin_unlock(&tr->max_lock);
1850 }
1851
1852 /**
1853 * update_max_tr_single - only copy one trace over, and reset the rest
1854 * @tr: tracer
1855 * @tsk: task with the latency
1856 * @cpu: the cpu of the buffer to copy.
1857 *
1858 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1859 */
1860 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1861 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1862 {
1863 int ret;
1864
1865 if (tr->stop_count)
1866 return;
1867
1868 WARN_ON_ONCE(!irqs_disabled());
1869 if (!tr->allocated_snapshot) {
1870 /* Only the nop tracer should hit this when disabling */
1871 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1872 return;
1873 }
1874
1875 arch_spin_lock(&tr->max_lock);
1876
1877 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1878
1879 if (ret == -EBUSY) {
1880 /*
1881 * We failed to swap the buffer due to a commit taking
1882 * place on this CPU. We fail to record, but we reset
1883 * the max trace buffer (no one writes directly to it)
1884 * and flag that it failed.
1885 */
1886 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1887 "Failed to swap buffers due to commit in progress\n");
1888 }
1889
1890 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1891
1892 __update_max_tr(tr, tsk, cpu);
1893 arch_spin_unlock(&tr->max_lock);
1894 }
1895 #endif /* CONFIG_TRACER_MAX_TRACE */
1896
wait_on_pipe(struct trace_iterator * iter,int full)1897 static int wait_on_pipe(struct trace_iterator *iter, int full)
1898 {
1899 /* Iterators are static, they should be filled or empty */
1900 if (trace_buffer_iter(iter, iter->cpu_file))
1901 return 0;
1902
1903 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1904 full);
1905 }
1906
1907 #ifdef CONFIG_FTRACE_STARTUP_TEST
1908 static bool selftests_can_run;
1909
1910 struct trace_selftests {
1911 struct list_head list;
1912 struct tracer *type;
1913 };
1914
1915 static LIST_HEAD(postponed_selftests);
1916
save_selftest(struct tracer * type)1917 static int save_selftest(struct tracer *type)
1918 {
1919 struct trace_selftests *selftest;
1920
1921 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1922 if (!selftest)
1923 return -ENOMEM;
1924
1925 selftest->type = type;
1926 list_add(&selftest->list, &postponed_selftests);
1927 return 0;
1928 }
1929
run_tracer_selftest(struct tracer * type)1930 static int run_tracer_selftest(struct tracer *type)
1931 {
1932 struct trace_array *tr = &global_trace;
1933 struct tracer *saved_tracer = tr->current_trace;
1934 int ret;
1935
1936 if (!type->selftest || tracing_selftest_disabled)
1937 return 0;
1938
1939 /*
1940 * If a tracer registers early in boot up (before scheduling is
1941 * initialized and such), then do not run its selftests yet.
1942 * Instead, run it a little later in the boot process.
1943 */
1944 if (!selftests_can_run)
1945 return save_selftest(type);
1946
1947 /*
1948 * Run a selftest on this tracer.
1949 * Here we reset the trace buffer, and set the current
1950 * tracer to be this tracer. The tracer can then run some
1951 * internal tracing to verify that everything is in order.
1952 * If we fail, we do not register this tracer.
1953 */
1954 tracing_reset_online_cpus(&tr->array_buffer);
1955
1956 tr->current_trace = type;
1957
1958 #ifdef CONFIG_TRACER_MAX_TRACE
1959 if (type->use_max_tr) {
1960 /* If we expanded the buffers, make sure the max is expanded too */
1961 if (ring_buffer_expanded)
1962 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1963 RING_BUFFER_ALL_CPUS);
1964 tr->allocated_snapshot = true;
1965 }
1966 #endif
1967
1968 /* the test is responsible for initializing and enabling */
1969 pr_info("Testing tracer %s: ", type->name);
1970 ret = type->selftest(type, tr);
1971 /* the test is responsible for resetting too */
1972 tr->current_trace = saved_tracer;
1973 if (ret) {
1974 printk(KERN_CONT "FAILED!\n");
1975 /* Add the warning after printing 'FAILED' */
1976 WARN_ON(1);
1977 return -1;
1978 }
1979 /* Only reset on passing, to avoid touching corrupted buffers */
1980 tracing_reset_online_cpus(&tr->array_buffer);
1981
1982 #ifdef CONFIG_TRACER_MAX_TRACE
1983 if (type->use_max_tr) {
1984 tr->allocated_snapshot = false;
1985
1986 /* Shrink the max buffer again */
1987 if (ring_buffer_expanded)
1988 ring_buffer_resize(tr->max_buffer.buffer, 1,
1989 RING_BUFFER_ALL_CPUS);
1990 }
1991 #endif
1992
1993 printk(KERN_CONT "PASSED\n");
1994 return 0;
1995 }
1996
init_trace_selftests(void)1997 static __init int init_trace_selftests(void)
1998 {
1999 struct trace_selftests *p, *n;
2000 struct tracer *t, **last;
2001 int ret;
2002
2003 selftests_can_run = true;
2004
2005 mutex_lock(&trace_types_lock);
2006
2007 if (list_empty(&postponed_selftests))
2008 goto out;
2009
2010 pr_info("Running postponed tracer tests:\n");
2011
2012 tracing_selftest_running = true;
2013 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2014 /* This loop can take minutes when sanitizers are enabled, so
2015 * lets make sure we allow RCU processing.
2016 */
2017 cond_resched();
2018 ret = run_tracer_selftest(p->type);
2019 /* If the test fails, then warn and remove from available_tracers */
2020 if (ret < 0) {
2021 WARN(1, "tracer: %s failed selftest, disabling\n",
2022 p->type->name);
2023 last = &trace_types;
2024 for (t = trace_types; t; t = t->next) {
2025 if (t == p->type) {
2026 *last = t->next;
2027 break;
2028 }
2029 last = &t->next;
2030 }
2031 }
2032 list_del(&p->list);
2033 kfree(p);
2034 }
2035 tracing_selftest_running = false;
2036
2037 out:
2038 mutex_unlock(&trace_types_lock);
2039
2040 return 0;
2041 }
2042 core_initcall(init_trace_selftests);
2043 #else
run_tracer_selftest(struct tracer * type)2044 static inline int run_tracer_selftest(struct tracer *type)
2045 {
2046 return 0;
2047 }
2048 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2049
2050 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2051
2052 static void __init apply_trace_boot_options(void);
2053
2054 /**
2055 * register_tracer - register a tracer with the ftrace system.
2056 * @type: the plugin for the tracer
2057 *
2058 * Register a new plugin tracer.
2059 */
register_tracer(struct tracer * type)2060 int __init register_tracer(struct tracer *type)
2061 {
2062 struct tracer *t;
2063 int ret = 0;
2064
2065 if (!type->name) {
2066 pr_info("Tracer must have a name\n");
2067 return -1;
2068 }
2069
2070 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2071 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2072 return -1;
2073 }
2074
2075 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2076 pr_warn("Can not register tracer %s due to lockdown\n",
2077 type->name);
2078 return -EPERM;
2079 }
2080
2081 mutex_lock(&trace_types_lock);
2082
2083 tracing_selftest_running = true;
2084
2085 for (t = trace_types; t; t = t->next) {
2086 if (strcmp(type->name, t->name) == 0) {
2087 /* already found */
2088 pr_info("Tracer %s already registered\n",
2089 type->name);
2090 ret = -1;
2091 goto out;
2092 }
2093 }
2094
2095 if (!type->set_flag)
2096 type->set_flag = &dummy_set_flag;
2097 if (!type->flags) {
2098 /*allocate a dummy tracer_flags*/
2099 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2100 if (!type->flags) {
2101 ret = -ENOMEM;
2102 goto out;
2103 }
2104 type->flags->val = 0;
2105 type->flags->opts = dummy_tracer_opt;
2106 } else
2107 if (!type->flags->opts)
2108 type->flags->opts = dummy_tracer_opt;
2109
2110 /* store the tracer for __set_tracer_option */
2111 type->flags->trace = type;
2112
2113 ret = run_tracer_selftest(type);
2114 if (ret < 0)
2115 goto out;
2116
2117 type->next = trace_types;
2118 trace_types = type;
2119 add_tracer_options(&global_trace, type);
2120
2121 out:
2122 tracing_selftest_running = false;
2123 mutex_unlock(&trace_types_lock);
2124
2125 if (ret || !default_bootup_tracer)
2126 goto out_unlock;
2127
2128 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2129 goto out_unlock;
2130
2131 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2132 /* Do we want this tracer to start on bootup? */
2133 tracing_set_tracer(&global_trace, type->name);
2134 default_bootup_tracer = NULL;
2135
2136 apply_trace_boot_options();
2137
2138 /* disable other selftests, since this will break it. */
2139 disable_tracing_selftest("running a tracer");
2140
2141 out_unlock:
2142 return ret;
2143 }
2144
tracing_reset_cpu(struct array_buffer * buf,int cpu)2145 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2146 {
2147 struct trace_buffer *buffer = buf->buffer;
2148
2149 if (!buffer)
2150 return;
2151
2152 ring_buffer_record_disable(buffer);
2153
2154 /* Make sure all commits have finished */
2155 synchronize_rcu();
2156 ring_buffer_reset_cpu(buffer, cpu);
2157
2158 ring_buffer_record_enable(buffer);
2159 }
2160
tracing_reset_online_cpus(struct array_buffer * buf)2161 void tracing_reset_online_cpus(struct array_buffer *buf)
2162 {
2163 struct trace_buffer *buffer = buf->buffer;
2164
2165 if (!buffer)
2166 return;
2167
2168 ring_buffer_record_disable(buffer);
2169
2170 /* Make sure all commits have finished */
2171 synchronize_rcu();
2172
2173 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2174
2175 ring_buffer_reset_online_cpus(buffer);
2176
2177 ring_buffer_record_enable(buffer);
2178 }
2179
2180 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)2181 void tracing_reset_all_online_cpus(void)
2182 {
2183 struct trace_array *tr;
2184
2185 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2186 if (!tr->clear_trace)
2187 continue;
2188 tr->clear_trace = false;
2189 tracing_reset_online_cpus(&tr->array_buffer);
2190 #ifdef CONFIG_TRACER_MAX_TRACE
2191 tracing_reset_online_cpus(&tr->max_buffer);
2192 #endif
2193 }
2194 }
2195
2196 /*
2197 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2198 * is the tgid last observed corresponding to pid=i.
2199 */
2200 static int *tgid_map;
2201
2202 /* The maximum valid index into tgid_map. */
2203 static size_t tgid_map_max;
2204
2205 #define SAVED_CMDLINES_DEFAULT 128
2206 #define NO_CMDLINE_MAP UINT_MAX
2207 /*
2208 * Preemption must be disabled before acquiring trace_cmdline_lock.
2209 * The various trace_arrays' max_lock must be acquired in a context
2210 * where interrupt is disabled.
2211 */
2212 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2213 struct saved_cmdlines_buffer {
2214 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2215 unsigned *map_cmdline_to_pid;
2216 unsigned cmdline_num;
2217 int cmdline_idx;
2218 char *saved_cmdlines;
2219 };
2220 static struct saved_cmdlines_buffer *savedcmd;
2221
get_saved_cmdlines(int idx)2222 static inline char *get_saved_cmdlines(int idx)
2223 {
2224 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2225 }
2226
set_cmdline(int idx,const char * cmdline)2227 static inline void set_cmdline(int idx, const char *cmdline)
2228 {
2229 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2230 }
2231
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)2232 static int allocate_cmdlines_buffer(unsigned int val,
2233 struct saved_cmdlines_buffer *s)
2234 {
2235 s->map_cmdline_to_pid = kmalloc_array(val,
2236 sizeof(*s->map_cmdline_to_pid),
2237 GFP_KERNEL);
2238 if (!s->map_cmdline_to_pid)
2239 return -ENOMEM;
2240
2241 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2242 if (!s->saved_cmdlines) {
2243 kfree(s->map_cmdline_to_pid);
2244 return -ENOMEM;
2245 }
2246
2247 s->cmdline_idx = 0;
2248 s->cmdline_num = val;
2249 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2250 sizeof(s->map_pid_to_cmdline));
2251 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2252 val * sizeof(*s->map_cmdline_to_pid));
2253
2254 return 0;
2255 }
2256
trace_create_savedcmd(void)2257 static int trace_create_savedcmd(void)
2258 {
2259 int ret;
2260
2261 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2262 if (!savedcmd)
2263 return -ENOMEM;
2264
2265 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2266 if (ret < 0) {
2267 kfree(savedcmd);
2268 savedcmd = NULL;
2269 return -ENOMEM;
2270 }
2271
2272 return 0;
2273 }
2274
is_tracing_stopped(void)2275 int is_tracing_stopped(void)
2276 {
2277 return global_trace.stop_count;
2278 }
2279
2280 /**
2281 * tracing_start - quick start of the tracer
2282 *
2283 * If tracing is enabled but was stopped by tracing_stop,
2284 * this will start the tracer back up.
2285 */
tracing_start(void)2286 void tracing_start(void)
2287 {
2288 struct trace_buffer *buffer;
2289 unsigned long flags;
2290
2291 if (tracing_disabled)
2292 return;
2293
2294 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2295 if (--global_trace.stop_count) {
2296 if (global_trace.stop_count < 0) {
2297 /* Someone screwed up their debugging */
2298 WARN_ON_ONCE(1);
2299 global_trace.stop_count = 0;
2300 }
2301 goto out;
2302 }
2303
2304 /* Prevent the buffers from switching */
2305 arch_spin_lock(&global_trace.max_lock);
2306
2307 buffer = global_trace.array_buffer.buffer;
2308 if (buffer)
2309 ring_buffer_record_enable(buffer);
2310
2311 #ifdef CONFIG_TRACER_MAX_TRACE
2312 buffer = global_trace.max_buffer.buffer;
2313 if (buffer)
2314 ring_buffer_record_enable(buffer);
2315 #endif
2316
2317 arch_spin_unlock(&global_trace.max_lock);
2318
2319 out:
2320 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2321 }
2322
tracing_start_tr(struct trace_array * tr)2323 static void tracing_start_tr(struct trace_array *tr)
2324 {
2325 struct trace_buffer *buffer;
2326 unsigned long flags;
2327
2328 if (tracing_disabled)
2329 return;
2330
2331 /* If global, we need to also start the max tracer */
2332 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2333 return tracing_start();
2334
2335 raw_spin_lock_irqsave(&tr->start_lock, flags);
2336
2337 if (--tr->stop_count) {
2338 if (tr->stop_count < 0) {
2339 /* Someone screwed up their debugging */
2340 WARN_ON_ONCE(1);
2341 tr->stop_count = 0;
2342 }
2343 goto out;
2344 }
2345
2346 buffer = tr->array_buffer.buffer;
2347 if (buffer)
2348 ring_buffer_record_enable(buffer);
2349
2350 out:
2351 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2352 }
2353
2354 /**
2355 * tracing_stop - quick stop of the tracer
2356 *
2357 * Light weight way to stop tracing. Use in conjunction with
2358 * tracing_start.
2359 */
tracing_stop(void)2360 void tracing_stop(void)
2361 {
2362 struct trace_buffer *buffer;
2363 unsigned long flags;
2364
2365 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2366 if (global_trace.stop_count++)
2367 goto out;
2368
2369 /* Prevent the buffers from switching */
2370 arch_spin_lock(&global_trace.max_lock);
2371
2372 buffer = global_trace.array_buffer.buffer;
2373 if (buffer)
2374 ring_buffer_record_disable(buffer);
2375
2376 #ifdef CONFIG_TRACER_MAX_TRACE
2377 buffer = global_trace.max_buffer.buffer;
2378 if (buffer)
2379 ring_buffer_record_disable(buffer);
2380 #endif
2381
2382 arch_spin_unlock(&global_trace.max_lock);
2383
2384 out:
2385 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2386 }
2387
tracing_stop_tr(struct trace_array * tr)2388 static void tracing_stop_tr(struct trace_array *tr)
2389 {
2390 struct trace_buffer *buffer;
2391 unsigned long flags;
2392
2393 /* If global, we need to also stop the max tracer */
2394 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2395 return tracing_stop();
2396
2397 raw_spin_lock_irqsave(&tr->start_lock, flags);
2398 if (tr->stop_count++)
2399 goto out;
2400
2401 buffer = tr->array_buffer.buffer;
2402 if (buffer)
2403 ring_buffer_record_disable(buffer);
2404
2405 out:
2406 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2407 }
2408
trace_save_cmdline(struct task_struct * tsk)2409 static int trace_save_cmdline(struct task_struct *tsk)
2410 {
2411 unsigned tpid, idx;
2412
2413 /* treat recording of idle task as a success */
2414 if (!tsk->pid)
2415 return 1;
2416
2417 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2418
2419 /*
2420 * It's not the end of the world if we don't get
2421 * the lock, but we also don't want to spin
2422 * nor do we want to disable interrupts,
2423 * so if we miss here, then better luck next time.
2424 *
2425 * This is called within the scheduler and wake up, so interrupts
2426 * had better been disabled and run queue lock been held.
2427 */
2428 lockdep_assert_preemption_disabled();
2429 if (!arch_spin_trylock(&trace_cmdline_lock))
2430 return 0;
2431
2432 idx = savedcmd->map_pid_to_cmdline[tpid];
2433 if (idx == NO_CMDLINE_MAP) {
2434 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2435
2436 savedcmd->map_pid_to_cmdline[tpid] = idx;
2437 savedcmd->cmdline_idx = idx;
2438 }
2439
2440 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2441 set_cmdline(idx, tsk->comm);
2442
2443 arch_spin_unlock(&trace_cmdline_lock);
2444
2445 return 1;
2446 }
2447
__trace_find_cmdline(int pid,char comm[])2448 static void __trace_find_cmdline(int pid, char comm[])
2449 {
2450 unsigned map;
2451 int tpid;
2452
2453 if (!pid) {
2454 strcpy(comm, "<idle>");
2455 return;
2456 }
2457
2458 if (WARN_ON_ONCE(pid < 0)) {
2459 strcpy(comm, "<XXX>");
2460 return;
2461 }
2462
2463 tpid = pid & (PID_MAX_DEFAULT - 1);
2464 map = savedcmd->map_pid_to_cmdline[tpid];
2465 if (map != NO_CMDLINE_MAP) {
2466 tpid = savedcmd->map_cmdline_to_pid[map];
2467 if (tpid == pid) {
2468 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2469 return;
2470 }
2471 }
2472 strcpy(comm, "<...>");
2473 }
2474
trace_find_cmdline(int pid,char comm[])2475 void trace_find_cmdline(int pid, char comm[])
2476 {
2477 preempt_disable();
2478 arch_spin_lock(&trace_cmdline_lock);
2479
2480 __trace_find_cmdline(pid, comm);
2481
2482 arch_spin_unlock(&trace_cmdline_lock);
2483 preempt_enable();
2484 }
2485
trace_find_tgid_ptr(int pid)2486 static int *trace_find_tgid_ptr(int pid)
2487 {
2488 /*
2489 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2490 * if we observe a non-NULL tgid_map then we also observe the correct
2491 * tgid_map_max.
2492 */
2493 int *map = smp_load_acquire(&tgid_map);
2494
2495 if (unlikely(!map || pid > tgid_map_max))
2496 return NULL;
2497
2498 return &map[pid];
2499 }
2500
trace_find_tgid(int pid)2501 int trace_find_tgid(int pid)
2502 {
2503 int *ptr = trace_find_tgid_ptr(pid);
2504
2505 return ptr ? *ptr : 0;
2506 }
2507
trace_save_tgid(struct task_struct * tsk)2508 static int trace_save_tgid(struct task_struct *tsk)
2509 {
2510 int *ptr;
2511
2512 /* treat recording of idle task as a success */
2513 if (!tsk->pid)
2514 return 1;
2515
2516 ptr = trace_find_tgid_ptr(tsk->pid);
2517 if (!ptr)
2518 return 0;
2519
2520 *ptr = tsk->tgid;
2521 return 1;
2522 }
2523
tracing_record_taskinfo_skip(int flags)2524 static bool tracing_record_taskinfo_skip(int flags)
2525 {
2526 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2527 return true;
2528 if (!__this_cpu_read(trace_taskinfo_save))
2529 return true;
2530 return false;
2531 }
2532
2533 /**
2534 * tracing_record_taskinfo - record the task info of a task
2535 *
2536 * @task: task to record
2537 * @flags: TRACE_RECORD_CMDLINE for recording comm
2538 * TRACE_RECORD_TGID for recording tgid
2539 */
tracing_record_taskinfo(struct task_struct * task,int flags)2540 void tracing_record_taskinfo(struct task_struct *task, int flags)
2541 {
2542 bool done;
2543
2544 if (tracing_record_taskinfo_skip(flags))
2545 return;
2546
2547 /*
2548 * Record as much task information as possible. If some fail, continue
2549 * to try to record the others.
2550 */
2551 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2552 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2553
2554 /* If recording any information failed, retry again soon. */
2555 if (!done)
2556 return;
2557
2558 __this_cpu_write(trace_taskinfo_save, false);
2559 }
2560
2561 /**
2562 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2563 *
2564 * @prev: previous task during sched_switch
2565 * @next: next task during sched_switch
2566 * @flags: TRACE_RECORD_CMDLINE for recording comm
2567 * TRACE_RECORD_TGID for recording tgid
2568 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2569 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2570 struct task_struct *next, int flags)
2571 {
2572 bool done;
2573
2574 if (tracing_record_taskinfo_skip(flags))
2575 return;
2576
2577 /*
2578 * Record as much task information as possible. If some fail, continue
2579 * to try to record the others.
2580 */
2581 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2582 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2583 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2584 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2585
2586 /* If recording any information failed, retry again soon. */
2587 if (!done)
2588 return;
2589
2590 __this_cpu_write(trace_taskinfo_save, false);
2591 }
2592
2593 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2594 void tracing_record_cmdline(struct task_struct *task)
2595 {
2596 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2597 }
2598
tracing_record_tgid(struct task_struct * task)2599 void tracing_record_tgid(struct task_struct *task)
2600 {
2601 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2602 }
2603
2604 /*
2605 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2606 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2607 * simplifies those functions and keeps them in sync.
2608 */
trace_handle_return(struct trace_seq * s)2609 enum print_line_t trace_handle_return(struct trace_seq *s)
2610 {
2611 return trace_seq_has_overflowed(s) ?
2612 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2613 }
2614 EXPORT_SYMBOL_GPL(trace_handle_return);
2615
2616 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2617 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2618 unsigned long flags, int pc)
2619 {
2620 struct task_struct *tsk = current;
2621
2622 entry->preempt_count = pc & 0xff;
2623 entry->pid = (tsk) ? tsk->pid : 0;
2624 entry->type = type;
2625 entry->flags =
2626 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2627 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2628 #else
2629 TRACE_FLAG_IRQS_NOSUPPORT |
2630 #endif
2631 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2632 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2633 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2634 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2635 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2636 }
2637 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2638
2639 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2640 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2641 int type,
2642 unsigned long len,
2643 unsigned long flags, int pc)
2644 {
2645 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2646 }
2647
2648 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2649 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2650 static int trace_buffered_event_ref;
2651
2652 /**
2653 * trace_buffered_event_enable - enable buffering events
2654 *
2655 * When events are being filtered, it is quicker to use a temporary
2656 * buffer to write the event data into if there's a likely chance
2657 * that it will not be committed. The discard of the ring buffer
2658 * is not as fast as committing, and is much slower than copying
2659 * a commit.
2660 *
2661 * When an event is to be filtered, allocate per cpu buffers to
2662 * write the event data into, and if the event is filtered and discarded
2663 * it is simply dropped, otherwise, the entire data is to be committed
2664 * in one shot.
2665 */
trace_buffered_event_enable(void)2666 void trace_buffered_event_enable(void)
2667 {
2668 struct ring_buffer_event *event;
2669 struct page *page;
2670 int cpu;
2671
2672 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2673
2674 if (trace_buffered_event_ref++)
2675 return;
2676
2677 for_each_tracing_cpu(cpu) {
2678 page = alloc_pages_node(cpu_to_node(cpu),
2679 GFP_KERNEL | __GFP_NORETRY, 0);
2680 if (!page)
2681 goto failed;
2682
2683 event = page_address(page);
2684 memset(event, 0, sizeof(*event));
2685
2686 per_cpu(trace_buffered_event, cpu) = event;
2687
2688 preempt_disable();
2689 if (cpu == smp_processor_id() &&
2690 __this_cpu_read(trace_buffered_event) !=
2691 per_cpu(trace_buffered_event, cpu))
2692 WARN_ON_ONCE(1);
2693 preempt_enable();
2694 }
2695
2696 return;
2697 failed:
2698 trace_buffered_event_disable();
2699 }
2700
enable_trace_buffered_event(void * data)2701 static void enable_trace_buffered_event(void *data)
2702 {
2703 /* Probably not needed, but do it anyway */
2704 smp_rmb();
2705 this_cpu_dec(trace_buffered_event_cnt);
2706 }
2707
disable_trace_buffered_event(void * data)2708 static void disable_trace_buffered_event(void *data)
2709 {
2710 this_cpu_inc(trace_buffered_event_cnt);
2711 }
2712
2713 /**
2714 * trace_buffered_event_disable - disable buffering events
2715 *
2716 * When a filter is removed, it is faster to not use the buffered
2717 * events, and to commit directly into the ring buffer. Free up
2718 * the temp buffers when there are no more users. This requires
2719 * special synchronization with current events.
2720 */
trace_buffered_event_disable(void)2721 void trace_buffered_event_disable(void)
2722 {
2723 int cpu;
2724
2725 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2726
2727 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2728 return;
2729
2730 if (--trace_buffered_event_ref)
2731 return;
2732
2733 preempt_disable();
2734 /* For each CPU, set the buffer as used. */
2735 smp_call_function_many(tracing_buffer_mask,
2736 disable_trace_buffered_event, NULL, 1);
2737 preempt_enable();
2738
2739 /* Wait for all current users to finish */
2740 synchronize_rcu();
2741
2742 for_each_tracing_cpu(cpu) {
2743 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2744 per_cpu(trace_buffered_event, cpu) = NULL;
2745 }
2746 /*
2747 * Make sure trace_buffered_event is NULL before clearing
2748 * trace_buffered_event_cnt.
2749 */
2750 smp_wmb();
2751
2752 preempt_disable();
2753 /* Do the work on each cpu */
2754 smp_call_function_many(tracing_buffer_mask,
2755 enable_trace_buffered_event, NULL, 1);
2756 preempt_enable();
2757 }
2758
2759 static struct trace_buffer *temp_buffer;
2760
2761 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2762 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2763 struct trace_event_file *trace_file,
2764 int type, unsigned long len,
2765 unsigned long flags, int pc)
2766 {
2767 struct ring_buffer_event *entry;
2768 int val;
2769
2770 *current_rb = trace_file->tr->array_buffer.buffer;
2771
2772 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2773 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2774 (entry = this_cpu_read(trace_buffered_event))) {
2775 /* Try to use the per cpu buffer first */
2776 val = this_cpu_inc_return(trace_buffered_event_cnt);
2777 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2778 trace_event_setup(entry, type, flags, pc);
2779 entry->array[0] = len;
2780 return entry;
2781 }
2782 this_cpu_dec(trace_buffered_event_cnt);
2783 }
2784
2785 entry = __trace_buffer_lock_reserve(*current_rb,
2786 type, len, flags, pc);
2787 /*
2788 * If tracing is off, but we have triggers enabled
2789 * we still need to look at the event data. Use the temp_buffer
2790 * to store the trace event for the trigger to use. It's recursive
2791 * safe and will not be recorded anywhere.
2792 */
2793 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2794 *current_rb = temp_buffer;
2795 entry = __trace_buffer_lock_reserve(*current_rb,
2796 type, len, flags, pc);
2797 }
2798 return entry;
2799 }
2800 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2801
2802 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2803 static DEFINE_MUTEX(tracepoint_printk_mutex);
2804
output_printk(struct trace_event_buffer * fbuffer)2805 static void output_printk(struct trace_event_buffer *fbuffer)
2806 {
2807 struct trace_event_call *event_call;
2808 struct trace_event_file *file;
2809 struct trace_event *event;
2810 unsigned long flags;
2811 struct trace_iterator *iter = tracepoint_print_iter;
2812
2813 /* We should never get here if iter is NULL */
2814 if (WARN_ON_ONCE(!iter))
2815 return;
2816
2817 event_call = fbuffer->trace_file->event_call;
2818 if (!event_call || !event_call->event.funcs ||
2819 !event_call->event.funcs->trace)
2820 return;
2821
2822 file = fbuffer->trace_file;
2823 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2824 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2825 !filter_match_preds(file->filter, fbuffer->entry)))
2826 return;
2827
2828 event = &fbuffer->trace_file->event_call->event;
2829
2830 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2831 trace_seq_init(&iter->seq);
2832 iter->ent = fbuffer->entry;
2833 event_call->event.funcs->trace(iter, 0, event);
2834 trace_seq_putc(&iter->seq, 0);
2835 printk("%s", iter->seq.buffer);
2836
2837 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2838 }
2839
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2840 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2841 void *buffer, size_t *lenp,
2842 loff_t *ppos)
2843 {
2844 int save_tracepoint_printk;
2845 int ret;
2846
2847 mutex_lock(&tracepoint_printk_mutex);
2848 save_tracepoint_printk = tracepoint_printk;
2849
2850 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2851
2852 /*
2853 * This will force exiting early, as tracepoint_printk
2854 * is always zero when tracepoint_printk_iter is not allocated
2855 */
2856 if (!tracepoint_print_iter)
2857 tracepoint_printk = 0;
2858
2859 if (save_tracepoint_printk == tracepoint_printk)
2860 goto out;
2861
2862 if (tracepoint_printk)
2863 static_key_enable(&tracepoint_printk_key.key);
2864 else
2865 static_key_disable(&tracepoint_printk_key.key);
2866
2867 out:
2868 mutex_unlock(&tracepoint_printk_mutex);
2869
2870 return ret;
2871 }
2872
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2873 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2874 {
2875 if (static_key_false(&tracepoint_printk_key.key))
2876 output_printk(fbuffer);
2877
2878 if (static_branch_unlikely(&trace_event_exports_enabled))
2879 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2880 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2881 fbuffer->event, fbuffer->entry,
2882 fbuffer->flags, fbuffer->pc, fbuffer->regs);
2883 }
2884 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2885
2886 /*
2887 * Skip 3:
2888 *
2889 * trace_buffer_unlock_commit_regs()
2890 * trace_event_buffer_commit()
2891 * trace_event_raw_event_xxx()
2892 */
2893 # define STACK_SKIP 3
2894
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2895 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2896 struct trace_buffer *buffer,
2897 struct ring_buffer_event *event,
2898 unsigned long flags, int pc,
2899 struct pt_regs *regs)
2900 {
2901 __buffer_unlock_commit(buffer, event);
2902
2903 /*
2904 * If regs is not set, then skip the necessary functions.
2905 * Note, we can still get here via blktrace, wakeup tracer
2906 * and mmiotrace, but that's ok if they lose a function or
2907 * two. They are not that meaningful.
2908 */
2909 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2910 ftrace_trace_userstack(tr, buffer, flags, pc);
2911 }
2912
2913 /*
2914 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2915 */
2916 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2917 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2918 struct ring_buffer_event *event)
2919 {
2920 __buffer_unlock_commit(buffer, event);
2921 }
2922
2923 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2924 trace_function(struct trace_array *tr,
2925 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2926 int pc)
2927 {
2928 struct trace_event_call *call = &event_function;
2929 struct trace_buffer *buffer = tr->array_buffer.buffer;
2930 struct ring_buffer_event *event;
2931 struct ftrace_entry *entry;
2932
2933 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2934 flags, pc);
2935 if (!event)
2936 return;
2937 entry = ring_buffer_event_data(event);
2938 entry->ip = ip;
2939 entry->parent_ip = parent_ip;
2940
2941 if (!call_filter_check_discard(call, entry, buffer, event)) {
2942 if (static_branch_unlikely(&trace_function_exports_enabled))
2943 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2944 __buffer_unlock_commit(buffer, event);
2945 }
2946 }
2947
2948 #ifdef CONFIG_STACKTRACE
2949
2950 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2951 #define FTRACE_KSTACK_NESTING 4
2952
2953 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2954
2955 struct ftrace_stack {
2956 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2957 };
2958
2959
2960 struct ftrace_stacks {
2961 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2962 };
2963
2964 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2965 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2966
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2967 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2968 unsigned long flags,
2969 int skip, int pc, struct pt_regs *regs)
2970 {
2971 struct trace_event_call *call = &event_kernel_stack;
2972 struct ring_buffer_event *event;
2973 unsigned int size, nr_entries;
2974 struct ftrace_stack *fstack;
2975 struct stack_entry *entry;
2976 int stackidx;
2977
2978 /*
2979 * Add one, for this function and the call to save_stack_trace()
2980 * If regs is set, then these functions will not be in the way.
2981 */
2982 #ifndef CONFIG_UNWINDER_ORC
2983 if (!regs)
2984 skip++;
2985 #endif
2986
2987 preempt_disable_notrace();
2988
2989 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2990
2991 /* This should never happen. If it does, yell once and skip */
2992 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2993 goto out;
2994
2995 /*
2996 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2997 * interrupt will either see the value pre increment or post
2998 * increment. If the interrupt happens pre increment it will have
2999 * restored the counter when it returns. We just need a barrier to
3000 * keep gcc from moving things around.
3001 */
3002 barrier();
3003
3004 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3005 size = ARRAY_SIZE(fstack->calls);
3006
3007 if (regs) {
3008 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3009 size, skip);
3010 } else {
3011 nr_entries = stack_trace_save(fstack->calls, size, skip);
3012 }
3013
3014 size = nr_entries * sizeof(unsigned long);
3015 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3016 (sizeof(*entry) - sizeof(entry->caller)) + size,
3017 flags, pc);
3018 if (!event)
3019 goto out;
3020 entry = ring_buffer_event_data(event);
3021
3022 memcpy(&entry->caller, fstack->calls, size);
3023 entry->size = nr_entries;
3024
3025 if (!call_filter_check_discard(call, entry, buffer, event))
3026 __buffer_unlock_commit(buffer, event);
3027
3028 out:
3029 /* Again, don't let gcc optimize things here */
3030 barrier();
3031 __this_cpu_dec(ftrace_stack_reserve);
3032 preempt_enable_notrace();
3033
3034 }
3035
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)3036 static inline void ftrace_trace_stack(struct trace_array *tr,
3037 struct trace_buffer *buffer,
3038 unsigned long flags,
3039 int skip, int pc, struct pt_regs *regs)
3040 {
3041 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3042 return;
3043
3044 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3045 }
3046
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)3047 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3048 int pc)
3049 {
3050 struct trace_buffer *buffer = tr->array_buffer.buffer;
3051
3052 if (rcu_is_watching()) {
3053 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3054 return;
3055 }
3056
3057 /*
3058 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3059 * but if the above rcu_is_watching() failed, then the NMI
3060 * triggered someplace critical, and rcu_irq_enter() should
3061 * not be called from NMI.
3062 */
3063 if (unlikely(in_nmi()))
3064 return;
3065
3066 rcu_irq_enter_irqson();
3067 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3068 rcu_irq_exit_irqson();
3069 }
3070
3071 /**
3072 * trace_dump_stack - record a stack back trace in the trace buffer
3073 * @skip: Number of functions to skip (helper handlers)
3074 */
trace_dump_stack(int skip)3075 void trace_dump_stack(int skip)
3076 {
3077 unsigned long flags;
3078
3079 if (tracing_disabled || tracing_selftest_running)
3080 return;
3081
3082 local_save_flags(flags);
3083
3084 #ifndef CONFIG_UNWINDER_ORC
3085 /* Skip 1 to skip this function. */
3086 skip++;
3087 #endif
3088 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3089 flags, skip, preempt_count(), NULL);
3090 }
3091 EXPORT_SYMBOL_GPL(trace_dump_stack);
3092
3093 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3094 static DEFINE_PER_CPU(int, user_stack_count);
3095
3096 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3097 ftrace_trace_userstack(struct trace_array *tr,
3098 struct trace_buffer *buffer, unsigned long flags, int pc)
3099 {
3100 struct trace_event_call *call = &event_user_stack;
3101 struct ring_buffer_event *event;
3102 struct userstack_entry *entry;
3103
3104 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3105 return;
3106
3107 /*
3108 * NMIs can not handle page faults, even with fix ups.
3109 * The save user stack can (and often does) fault.
3110 */
3111 if (unlikely(in_nmi()))
3112 return;
3113
3114 /*
3115 * prevent recursion, since the user stack tracing may
3116 * trigger other kernel events.
3117 */
3118 preempt_disable();
3119 if (__this_cpu_read(user_stack_count))
3120 goto out;
3121
3122 __this_cpu_inc(user_stack_count);
3123
3124 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3125 sizeof(*entry), flags, pc);
3126 if (!event)
3127 goto out_drop_count;
3128 entry = ring_buffer_event_data(event);
3129
3130 entry->tgid = current->tgid;
3131 memset(&entry->caller, 0, sizeof(entry->caller));
3132
3133 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3134 if (!call_filter_check_discard(call, entry, buffer, event))
3135 __buffer_unlock_commit(buffer, event);
3136
3137 out_drop_count:
3138 __this_cpu_dec(user_stack_count);
3139 out:
3140 preempt_enable();
3141 }
3142 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3143 static void ftrace_trace_userstack(struct trace_array *tr,
3144 struct trace_buffer *buffer,
3145 unsigned long flags, int pc)
3146 {
3147 }
3148 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3149
3150 #endif /* CONFIG_STACKTRACE */
3151
3152 /* created for use with alloc_percpu */
3153 struct trace_buffer_struct {
3154 int nesting;
3155 char buffer[4][TRACE_BUF_SIZE];
3156 };
3157
3158 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3159
3160 /*
3161 * Thise allows for lockless recording. If we're nested too deeply, then
3162 * this returns NULL.
3163 */
get_trace_buf(void)3164 static char *get_trace_buf(void)
3165 {
3166 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3167
3168 if (!trace_percpu_buffer || buffer->nesting >= 4)
3169 return NULL;
3170
3171 buffer->nesting++;
3172
3173 /* Interrupts must see nesting incremented before we use the buffer */
3174 barrier();
3175 return &buffer->buffer[buffer->nesting - 1][0];
3176 }
3177
put_trace_buf(void)3178 static void put_trace_buf(void)
3179 {
3180 /* Don't let the decrement of nesting leak before this */
3181 barrier();
3182 this_cpu_dec(trace_percpu_buffer->nesting);
3183 }
3184
alloc_percpu_trace_buffer(void)3185 static int alloc_percpu_trace_buffer(void)
3186 {
3187 struct trace_buffer_struct __percpu *buffers;
3188
3189 if (trace_percpu_buffer)
3190 return 0;
3191
3192 buffers = alloc_percpu(struct trace_buffer_struct);
3193 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3194 return -ENOMEM;
3195
3196 trace_percpu_buffer = buffers;
3197 return 0;
3198 }
3199
3200 static int buffers_allocated;
3201
trace_printk_init_buffers(void)3202 void trace_printk_init_buffers(void)
3203 {
3204 if (buffers_allocated)
3205 return;
3206
3207 if (alloc_percpu_trace_buffer())
3208 return;
3209
3210 /* trace_printk() is for debug use only. Don't use it in production. */
3211
3212 pr_warn("\n");
3213 pr_warn("**********************************************************\n");
3214 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3215 pr_warn("** **\n");
3216 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3217 pr_warn("** **\n");
3218 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3219 pr_warn("** unsafe for production use. **\n");
3220 pr_warn("** **\n");
3221 pr_warn("** If you see this message and you are not debugging **\n");
3222 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3223 pr_warn("** **\n");
3224 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3225 pr_warn("**********************************************************\n");
3226
3227 /* Expand the buffers to set size */
3228 tracing_update_buffers();
3229
3230 buffers_allocated = 1;
3231
3232 /*
3233 * trace_printk_init_buffers() can be called by modules.
3234 * If that happens, then we need to start cmdline recording
3235 * directly here. If the global_trace.buffer is already
3236 * allocated here, then this was called by module code.
3237 */
3238 if (global_trace.array_buffer.buffer)
3239 tracing_start_cmdline_record();
3240 }
3241 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3242
trace_printk_start_comm(void)3243 void trace_printk_start_comm(void)
3244 {
3245 /* Start tracing comms if trace printk is set */
3246 if (!buffers_allocated)
3247 return;
3248 tracing_start_cmdline_record();
3249 }
3250
trace_printk_start_stop_comm(int enabled)3251 static void trace_printk_start_stop_comm(int enabled)
3252 {
3253 if (!buffers_allocated)
3254 return;
3255
3256 if (enabled)
3257 tracing_start_cmdline_record();
3258 else
3259 tracing_stop_cmdline_record();
3260 }
3261
3262 /**
3263 * trace_vbprintk - write binary msg to tracing buffer
3264 * @ip: The address of the caller
3265 * @fmt: The string format to write to the buffer
3266 * @args: Arguments for @fmt
3267 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3268 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3269 {
3270 struct trace_event_call *call = &event_bprint;
3271 struct ring_buffer_event *event;
3272 struct trace_buffer *buffer;
3273 struct trace_array *tr = &global_trace;
3274 struct bprint_entry *entry;
3275 unsigned long flags;
3276 char *tbuffer;
3277 int len = 0, size, pc;
3278
3279 if (unlikely(tracing_selftest_running || tracing_disabled))
3280 return 0;
3281
3282 /* Don't pollute graph traces with trace_vprintk internals */
3283 pause_graph_tracing();
3284
3285 pc = preempt_count();
3286 preempt_disable_notrace();
3287
3288 tbuffer = get_trace_buf();
3289 if (!tbuffer) {
3290 len = 0;
3291 goto out_nobuffer;
3292 }
3293
3294 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3295
3296 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3297 goto out_put;
3298
3299 local_save_flags(flags);
3300 size = sizeof(*entry) + sizeof(u32) * len;
3301 buffer = tr->array_buffer.buffer;
3302 ring_buffer_nest_start(buffer);
3303 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3304 flags, pc);
3305 if (!event)
3306 goto out;
3307 entry = ring_buffer_event_data(event);
3308 entry->ip = ip;
3309 entry->fmt = fmt;
3310
3311 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3312 if (!call_filter_check_discard(call, entry, buffer, event)) {
3313 __buffer_unlock_commit(buffer, event);
3314 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3315 }
3316
3317 out:
3318 ring_buffer_nest_end(buffer);
3319 out_put:
3320 put_trace_buf();
3321
3322 out_nobuffer:
3323 preempt_enable_notrace();
3324 unpause_graph_tracing();
3325
3326 return len;
3327 }
3328 EXPORT_SYMBOL_GPL(trace_vbprintk);
3329
3330 __printf(3, 0)
3331 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3332 __trace_array_vprintk(struct trace_buffer *buffer,
3333 unsigned long ip, const char *fmt, va_list args)
3334 {
3335 struct trace_event_call *call = &event_print;
3336 struct ring_buffer_event *event;
3337 int len = 0, size, pc;
3338 struct print_entry *entry;
3339 unsigned long flags;
3340 char *tbuffer;
3341
3342 if (tracing_disabled || tracing_selftest_running)
3343 return 0;
3344
3345 /* Don't pollute graph traces with trace_vprintk internals */
3346 pause_graph_tracing();
3347
3348 pc = preempt_count();
3349 preempt_disable_notrace();
3350
3351
3352 tbuffer = get_trace_buf();
3353 if (!tbuffer) {
3354 len = 0;
3355 goto out_nobuffer;
3356 }
3357
3358 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3359
3360 local_save_flags(flags);
3361 size = sizeof(*entry) + len + 1;
3362 ring_buffer_nest_start(buffer);
3363 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3364 flags, pc);
3365 if (!event)
3366 goto out;
3367 entry = ring_buffer_event_data(event);
3368 entry->ip = ip;
3369
3370 memcpy(&entry->buf, tbuffer, len + 1);
3371 if (!call_filter_check_discard(call, entry, buffer, event)) {
3372 __buffer_unlock_commit(buffer, event);
3373 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3374 }
3375
3376 out:
3377 ring_buffer_nest_end(buffer);
3378 put_trace_buf();
3379
3380 out_nobuffer:
3381 preempt_enable_notrace();
3382 unpause_graph_tracing();
3383
3384 return len;
3385 }
3386
3387 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3388 int trace_array_vprintk(struct trace_array *tr,
3389 unsigned long ip, const char *fmt, va_list args)
3390 {
3391 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3392 }
3393
3394 /**
3395 * trace_array_printk - Print a message to a specific instance
3396 * @tr: The instance trace_array descriptor
3397 * @ip: The instruction pointer that this is called from.
3398 * @fmt: The format to print (printf format)
3399 *
3400 * If a subsystem sets up its own instance, they have the right to
3401 * printk strings into their tracing instance buffer using this
3402 * function. Note, this function will not write into the top level
3403 * buffer (use trace_printk() for that), as writing into the top level
3404 * buffer should only have events that can be individually disabled.
3405 * trace_printk() is only used for debugging a kernel, and should not
3406 * be ever encorporated in normal use.
3407 *
3408 * trace_array_printk() can be used, as it will not add noise to the
3409 * top level tracing buffer.
3410 *
3411 * Note, trace_array_init_printk() must be called on @tr before this
3412 * can be used.
3413 */
3414 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3415 int trace_array_printk(struct trace_array *tr,
3416 unsigned long ip, const char *fmt, ...)
3417 {
3418 int ret;
3419 va_list ap;
3420
3421 if (!tr)
3422 return -ENOENT;
3423
3424 /* This is only allowed for created instances */
3425 if (tr == &global_trace)
3426 return 0;
3427
3428 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3429 return 0;
3430
3431 va_start(ap, fmt);
3432 ret = trace_array_vprintk(tr, ip, fmt, ap);
3433 va_end(ap);
3434 return ret;
3435 }
3436 EXPORT_SYMBOL_GPL(trace_array_printk);
3437
3438 /**
3439 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3440 * @tr: The trace array to initialize the buffers for
3441 *
3442 * As trace_array_printk() only writes into instances, they are OK to
3443 * have in the kernel (unlike trace_printk()). This needs to be called
3444 * before trace_array_printk() can be used on a trace_array.
3445 */
trace_array_init_printk(struct trace_array * tr)3446 int trace_array_init_printk(struct trace_array *tr)
3447 {
3448 if (!tr)
3449 return -ENOENT;
3450
3451 /* This is only allowed for created instances */
3452 if (tr == &global_trace)
3453 return -EINVAL;
3454
3455 return alloc_percpu_trace_buffer();
3456 }
3457 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3458
3459 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3460 int trace_array_printk_buf(struct trace_buffer *buffer,
3461 unsigned long ip, const char *fmt, ...)
3462 {
3463 int ret;
3464 va_list ap;
3465
3466 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3467 return 0;
3468
3469 va_start(ap, fmt);
3470 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3471 va_end(ap);
3472 return ret;
3473 }
3474
3475 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3476 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3477 {
3478 return trace_array_vprintk(&global_trace, ip, fmt, args);
3479 }
3480 EXPORT_SYMBOL_GPL(trace_vprintk);
3481
trace_iterator_increment(struct trace_iterator * iter)3482 static void trace_iterator_increment(struct trace_iterator *iter)
3483 {
3484 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3485
3486 iter->idx++;
3487 if (buf_iter)
3488 ring_buffer_iter_advance(buf_iter);
3489 }
3490
3491 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3492 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3493 unsigned long *lost_events)
3494 {
3495 struct ring_buffer_event *event;
3496 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3497
3498 if (buf_iter) {
3499 event = ring_buffer_iter_peek(buf_iter, ts);
3500 if (lost_events)
3501 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3502 (unsigned long)-1 : 0;
3503 } else {
3504 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3505 lost_events);
3506 }
3507
3508 if (event) {
3509 iter->ent_size = ring_buffer_event_length(event);
3510 return ring_buffer_event_data(event);
3511 }
3512 iter->ent_size = 0;
3513 return NULL;
3514 }
3515
3516 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3517 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3518 unsigned long *missing_events, u64 *ent_ts)
3519 {
3520 struct trace_buffer *buffer = iter->array_buffer->buffer;
3521 struct trace_entry *ent, *next = NULL;
3522 unsigned long lost_events = 0, next_lost = 0;
3523 int cpu_file = iter->cpu_file;
3524 u64 next_ts = 0, ts;
3525 int next_cpu = -1;
3526 int next_size = 0;
3527 int cpu;
3528
3529 /*
3530 * If we are in a per_cpu trace file, don't bother by iterating over
3531 * all cpu and peek directly.
3532 */
3533 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3534 if (ring_buffer_empty_cpu(buffer, cpu_file))
3535 return NULL;
3536 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3537 if (ent_cpu)
3538 *ent_cpu = cpu_file;
3539
3540 return ent;
3541 }
3542
3543 for_each_tracing_cpu(cpu) {
3544
3545 if (ring_buffer_empty_cpu(buffer, cpu))
3546 continue;
3547
3548 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3549
3550 /*
3551 * Pick the entry with the smallest timestamp:
3552 */
3553 if (ent && (!next || ts < next_ts)) {
3554 next = ent;
3555 next_cpu = cpu;
3556 next_ts = ts;
3557 next_lost = lost_events;
3558 next_size = iter->ent_size;
3559 }
3560 }
3561
3562 iter->ent_size = next_size;
3563
3564 if (ent_cpu)
3565 *ent_cpu = next_cpu;
3566
3567 if (ent_ts)
3568 *ent_ts = next_ts;
3569
3570 if (missing_events)
3571 *missing_events = next_lost;
3572
3573 return next;
3574 }
3575
3576 #define STATIC_TEMP_BUF_SIZE 128
3577 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3578
3579 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3580 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3581 int *ent_cpu, u64 *ent_ts)
3582 {
3583 /* __find_next_entry will reset ent_size */
3584 int ent_size = iter->ent_size;
3585 struct trace_entry *entry;
3586
3587 /*
3588 * If called from ftrace_dump(), then the iter->temp buffer
3589 * will be the static_temp_buf and not created from kmalloc.
3590 * If the entry size is greater than the buffer, we can
3591 * not save it. Just return NULL in that case. This is only
3592 * used to add markers when two consecutive events' time
3593 * stamps have a large delta. See trace_print_lat_context()
3594 */
3595 if (iter->temp == static_temp_buf &&
3596 STATIC_TEMP_BUF_SIZE < ent_size)
3597 return NULL;
3598
3599 /*
3600 * The __find_next_entry() may call peek_next_entry(), which may
3601 * call ring_buffer_peek() that may make the contents of iter->ent
3602 * undefined. Need to copy iter->ent now.
3603 */
3604 if (iter->ent && iter->ent != iter->temp) {
3605 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3606 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3607 void *temp;
3608 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3609 if (!temp)
3610 return NULL;
3611 kfree(iter->temp);
3612 iter->temp = temp;
3613 iter->temp_size = iter->ent_size;
3614 }
3615 memcpy(iter->temp, iter->ent, iter->ent_size);
3616 iter->ent = iter->temp;
3617 }
3618 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3619 /* Put back the original ent_size */
3620 iter->ent_size = ent_size;
3621
3622 return entry;
3623 }
3624
3625 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3626 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3627 {
3628 iter->ent = __find_next_entry(iter, &iter->cpu,
3629 &iter->lost_events, &iter->ts);
3630
3631 if (iter->ent)
3632 trace_iterator_increment(iter);
3633
3634 return iter->ent ? iter : NULL;
3635 }
3636
trace_consume(struct trace_iterator * iter)3637 static void trace_consume(struct trace_iterator *iter)
3638 {
3639 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3640 &iter->lost_events);
3641 }
3642
s_next(struct seq_file * m,void * v,loff_t * pos)3643 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3644 {
3645 struct trace_iterator *iter = m->private;
3646 int i = (int)*pos;
3647 void *ent;
3648
3649 WARN_ON_ONCE(iter->leftover);
3650
3651 (*pos)++;
3652
3653 /* can't go backwards */
3654 if (iter->idx > i)
3655 return NULL;
3656
3657 if (iter->idx < 0)
3658 ent = trace_find_next_entry_inc(iter);
3659 else
3660 ent = iter;
3661
3662 while (ent && iter->idx < i)
3663 ent = trace_find_next_entry_inc(iter);
3664
3665 iter->pos = *pos;
3666
3667 return ent;
3668 }
3669
tracing_iter_reset(struct trace_iterator * iter,int cpu)3670 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3671 {
3672 struct ring_buffer_iter *buf_iter;
3673 unsigned long entries = 0;
3674 u64 ts;
3675
3676 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3677
3678 buf_iter = trace_buffer_iter(iter, cpu);
3679 if (!buf_iter)
3680 return;
3681
3682 ring_buffer_iter_reset(buf_iter);
3683
3684 /*
3685 * We could have the case with the max latency tracers
3686 * that a reset never took place on a cpu. This is evident
3687 * by the timestamp being before the start of the buffer.
3688 */
3689 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3690 if (ts >= iter->array_buffer->time_start)
3691 break;
3692 entries++;
3693 ring_buffer_iter_advance(buf_iter);
3694 }
3695
3696 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3697 }
3698
3699 /*
3700 * The current tracer is copied to avoid a global locking
3701 * all around.
3702 */
s_start(struct seq_file * m,loff_t * pos)3703 static void *s_start(struct seq_file *m, loff_t *pos)
3704 {
3705 struct trace_iterator *iter = m->private;
3706 struct trace_array *tr = iter->tr;
3707 int cpu_file = iter->cpu_file;
3708 void *p = NULL;
3709 loff_t l = 0;
3710 int cpu;
3711
3712 /*
3713 * copy the tracer to avoid using a global lock all around.
3714 * iter->trace is a copy of current_trace, the pointer to the
3715 * name may be used instead of a strcmp(), as iter->trace->name
3716 * will point to the same string as current_trace->name.
3717 */
3718 mutex_lock(&trace_types_lock);
3719 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3720 *iter->trace = *tr->current_trace;
3721 mutex_unlock(&trace_types_lock);
3722
3723 #ifdef CONFIG_TRACER_MAX_TRACE
3724 if (iter->snapshot && iter->trace->use_max_tr)
3725 return ERR_PTR(-EBUSY);
3726 #endif
3727
3728 if (*pos != iter->pos) {
3729 iter->ent = NULL;
3730 iter->cpu = 0;
3731 iter->idx = -1;
3732
3733 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3734 for_each_tracing_cpu(cpu)
3735 tracing_iter_reset(iter, cpu);
3736 } else
3737 tracing_iter_reset(iter, cpu_file);
3738
3739 iter->leftover = 0;
3740 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3741 ;
3742
3743 } else {
3744 /*
3745 * If we overflowed the seq_file before, then we want
3746 * to just reuse the trace_seq buffer again.
3747 */
3748 if (iter->leftover)
3749 p = iter;
3750 else {
3751 l = *pos - 1;
3752 p = s_next(m, p, &l);
3753 }
3754 }
3755
3756 trace_event_read_lock();
3757 trace_access_lock(cpu_file);
3758 return p;
3759 }
3760
s_stop(struct seq_file * m,void * p)3761 static void s_stop(struct seq_file *m, void *p)
3762 {
3763 struct trace_iterator *iter = m->private;
3764
3765 #ifdef CONFIG_TRACER_MAX_TRACE
3766 if (iter->snapshot && iter->trace->use_max_tr)
3767 return;
3768 #endif
3769
3770 trace_access_unlock(iter->cpu_file);
3771 trace_event_read_unlock();
3772 }
3773
3774 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3775 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3776 unsigned long *entries, int cpu)
3777 {
3778 unsigned long count;
3779
3780 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3781 /*
3782 * If this buffer has skipped entries, then we hold all
3783 * entries for the trace and we need to ignore the
3784 * ones before the time stamp.
3785 */
3786 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3787 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3788 /* total is the same as the entries */
3789 *total = count;
3790 } else
3791 *total = count +
3792 ring_buffer_overrun_cpu(buf->buffer, cpu);
3793 *entries = count;
3794 }
3795
3796 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3797 get_total_entries(struct array_buffer *buf,
3798 unsigned long *total, unsigned long *entries)
3799 {
3800 unsigned long t, e;
3801 int cpu;
3802
3803 *total = 0;
3804 *entries = 0;
3805
3806 for_each_tracing_cpu(cpu) {
3807 get_total_entries_cpu(buf, &t, &e, cpu);
3808 *total += t;
3809 *entries += e;
3810 }
3811 }
3812
trace_total_entries_cpu(struct trace_array * tr,int cpu)3813 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3814 {
3815 unsigned long total, entries;
3816
3817 if (!tr)
3818 tr = &global_trace;
3819
3820 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3821
3822 return entries;
3823 }
3824
trace_total_entries(struct trace_array * tr)3825 unsigned long trace_total_entries(struct trace_array *tr)
3826 {
3827 unsigned long total, entries;
3828
3829 if (!tr)
3830 tr = &global_trace;
3831
3832 get_total_entries(&tr->array_buffer, &total, &entries);
3833
3834 return entries;
3835 }
3836
print_lat_help_header(struct seq_file * m)3837 static void print_lat_help_header(struct seq_file *m)
3838 {
3839 seq_puts(m, "# _------=> CPU# \n"
3840 "# / _-----=> irqs-off \n"
3841 "# | / _----=> need-resched \n"
3842 "# || / _---=> hardirq/softirq \n"
3843 "# ||| / _--=> preempt-depth \n"
3844 "# |||| / delay \n"
3845 "# cmd pid ||||| time | caller \n"
3846 "# \\ / ||||| \\ | / \n");
3847 }
3848
print_event_info(struct array_buffer * buf,struct seq_file * m)3849 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3850 {
3851 unsigned long total;
3852 unsigned long entries;
3853
3854 get_total_entries(buf, &total, &entries);
3855 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3856 entries, total, num_online_cpus());
3857 seq_puts(m, "#\n");
3858 }
3859
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3860 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3861 unsigned int flags)
3862 {
3863 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3864
3865 print_event_info(buf, m);
3866
3867 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3868 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3869 }
3870
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3871 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3872 unsigned int flags)
3873 {
3874 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3875 const char *space = " ";
3876 int prec = tgid ? 12 : 2;
3877
3878 print_event_info(buf, m);
3879
3880 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3881 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3882 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3883 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3884 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3885 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3886 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3887 }
3888
3889 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3890 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3891 {
3892 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3893 struct array_buffer *buf = iter->array_buffer;
3894 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3895 struct tracer *type = iter->trace;
3896 unsigned long entries;
3897 unsigned long total;
3898 const char *name = "preemption";
3899
3900 name = type->name;
3901
3902 get_total_entries(buf, &total, &entries);
3903
3904 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3905 name, UTS_RELEASE);
3906 seq_puts(m, "# -----------------------------------"
3907 "---------------------------------\n");
3908 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3909 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3910 nsecs_to_usecs(data->saved_latency),
3911 entries,
3912 total,
3913 buf->cpu,
3914 #if defined(CONFIG_PREEMPT_NONE)
3915 "server",
3916 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3917 "desktop",
3918 #elif defined(CONFIG_PREEMPT)
3919 "preempt",
3920 #elif defined(CONFIG_PREEMPT_RT)
3921 "preempt_rt",
3922 #else
3923 "unknown",
3924 #endif
3925 /* These are reserved for later use */
3926 0, 0, 0, 0);
3927 #ifdef CONFIG_SMP
3928 seq_printf(m, " #P:%d)\n", num_online_cpus());
3929 #else
3930 seq_puts(m, ")\n");
3931 #endif
3932 seq_puts(m, "# -----------------\n");
3933 seq_printf(m, "# | task: %.16s-%d "
3934 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3935 data->comm, data->pid,
3936 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3937 data->policy, data->rt_priority);
3938 seq_puts(m, "# -----------------\n");
3939
3940 if (data->critical_start) {
3941 seq_puts(m, "# => started at: ");
3942 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3943 trace_print_seq(m, &iter->seq);
3944 seq_puts(m, "\n# => ended at: ");
3945 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3946 trace_print_seq(m, &iter->seq);
3947 seq_puts(m, "\n#\n");
3948 }
3949
3950 seq_puts(m, "#\n");
3951 }
3952
test_cpu_buff_start(struct trace_iterator * iter)3953 static void test_cpu_buff_start(struct trace_iterator *iter)
3954 {
3955 struct trace_seq *s = &iter->seq;
3956 struct trace_array *tr = iter->tr;
3957
3958 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3959 return;
3960
3961 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3962 return;
3963
3964 if (cpumask_available(iter->started) &&
3965 cpumask_test_cpu(iter->cpu, iter->started))
3966 return;
3967
3968 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3969 return;
3970
3971 if (cpumask_available(iter->started))
3972 cpumask_set_cpu(iter->cpu, iter->started);
3973
3974 /* Don't print started cpu buffer for the first entry of the trace */
3975 if (iter->idx > 1)
3976 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3977 iter->cpu);
3978 }
3979
print_trace_fmt(struct trace_iterator * iter)3980 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3981 {
3982 struct trace_array *tr = iter->tr;
3983 struct trace_seq *s = &iter->seq;
3984 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3985 struct trace_entry *entry;
3986 struct trace_event *event;
3987
3988 entry = iter->ent;
3989
3990 test_cpu_buff_start(iter);
3991
3992 event = ftrace_find_event(entry->type);
3993
3994 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3995 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3996 trace_print_lat_context(iter);
3997 else
3998 trace_print_context(iter);
3999 }
4000
4001 if (trace_seq_has_overflowed(s))
4002 return TRACE_TYPE_PARTIAL_LINE;
4003
4004 if (event)
4005 return event->funcs->trace(iter, sym_flags, event);
4006
4007 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4008
4009 return trace_handle_return(s);
4010 }
4011
print_raw_fmt(struct trace_iterator * iter)4012 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4013 {
4014 struct trace_array *tr = iter->tr;
4015 struct trace_seq *s = &iter->seq;
4016 struct trace_entry *entry;
4017 struct trace_event *event;
4018
4019 entry = iter->ent;
4020
4021 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4022 trace_seq_printf(s, "%d %d %llu ",
4023 entry->pid, iter->cpu, iter->ts);
4024
4025 if (trace_seq_has_overflowed(s))
4026 return TRACE_TYPE_PARTIAL_LINE;
4027
4028 event = ftrace_find_event(entry->type);
4029 if (event)
4030 return event->funcs->raw(iter, 0, event);
4031
4032 trace_seq_printf(s, "%d ?\n", entry->type);
4033
4034 return trace_handle_return(s);
4035 }
4036
print_hex_fmt(struct trace_iterator * iter)4037 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4038 {
4039 struct trace_array *tr = iter->tr;
4040 struct trace_seq *s = &iter->seq;
4041 unsigned char newline = '\n';
4042 struct trace_entry *entry;
4043 struct trace_event *event;
4044
4045 entry = iter->ent;
4046
4047 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4048 SEQ_PUT_HEX_FIELD(s, entry->pid);
4049 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4050 SEQ_PUT_HEX_FIELD(s, iter->ts);
4051 if (trace_seq_has_overflowed(s))
4052 return TRACE_TYPE_PARTIAL_LINE;
4053 }
4054
4055 event = ftrace_find_event(entry->type);
4056 if (event) {
4057 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4058 if (ret != TRACE_TYPE_HANDLED)
4059 return ret;
4060 }
4061
4062 SEQ_PUT_FIELD(s, newline);
4063
4064 return trace_handle_return(s);
4065 }
4066
print_bin_fmt(struct trace_iterator * iter)4067 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4068 {
4069 struct trace_array *tr = iter->tr;
4070 struct trace_seq *s = &iter->seq;
4071 struct trace_entry *entry;
4072 struct trace_event *event;
4073
4074 entry = iter->ent;
4075
4076 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4077 SEQ_PUT_FIELD(s, entry->pid);
4078 SEQ_PUT_FIELD(s, iter->cpu);
4079 SEQ_PUT_FIELD(s, iter->ts);
4080 if (trace_seq_has_overflowed(s))
4081 return TRACE_TYPE_PARTIAL_LINE;
4082 }
4083
4084 event = ftrace_find_event(entry->type);
4085 return event ? event->funcs->binary(iter, 0, event) :
4086 TRACE_TYPE_HANDLED;
4087 }
4088
trace_empty(struct trace_iterator * iter)4089 int trace_empty(struct trace_iterator *iter)
4090 {
4091 struct ring_buffer_iter *buf_iter;
4092 int cpu;
4093
4094 /* If we are looking at one CPU buffer, only check that one */
4095 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4096 cpu = iter->cpu_file;
4097 buf_iter = trace_buffer_iter(iter, cpu);
4098 if (buf_iter) {
4099 if (!ring_buffer_iter_empty(buf_iter))
4100 return 0;
4101 } else {
4102 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4103 return 0;
4104 }
4105 return 1;
4106 }
4107
4108 for_each_tracing_cpu(cpu) {
4109 buf_iter = trace_buffer_iter(iter, cpu);
4110 if (buf_iter) {
4111 if (!ring_buffer_iter_empty(buf_iter))
4112 return 0;
4113 } else {
4114 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4115 return 0;
4116 }
4117 }
4118
4119 return 1;
4120 }
4121
4122 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4123 enum print_line_t print_trace_line(struct trace_iterator *iter)
4124 {
4125 struct trace_array *tr = iter->tr;
4126 unsigned long trace_flags = tr->trace_flags;
4127 enum print_line_t ret;
4128
4129 if (iter->lost_events) {
4130 if (iter->lost_events == (unsigned long)-1)
4131 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4132 iter->cpu);
4133 else
4134 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4135 iter->cpu, iter->lost_events);
4136 if (trace_seq_has_overflowed(&iter->seq))
4137 return TRACE_TYPE_PARTIAL_LINE;
4138 }
4139
4140 if (iter->trace && iter->trace->print_line) {
4141 ret = iter->trace->print_line(iter);
4142 if (ret != TRACE_TYPE_UNHANDLED)
4143 return ret;
4144 }
4145
4146 if (iter->ent->type == TRACE_BPUTS &&
4147 trace_flags & TRACE_ITER_PRINTK &&
4148 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4149 return trace_print_bputs_msg_only(iter);
4150
4151 if (iter->ent->type == TRACE_BPRINT &&
4152 trace_flags & TRACE_ITER_PRINTK &&
4153 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4154 return trace_print_bprintk_msg_only(iter);
4155
4156 if (iter->ent->type == TRACE_PRINT &&
4157 trace_flags & TRACE_ITER_PRINTK &&
4158 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4159 return trace_print_printk_msg_only(iter);
4160
4161 if (trace_flags & TRACE_ITER_BIN)
4162 return print_bin_fmt(iter);
4163
4164 if (trace_flags & TRACE_ITER_HEX)
4165 return print_hex_fmt(iter);
4166
4167 if (trace_flags & TRACE_ITER_RAW)
4168 return print_raw_fmt(iter);
4169
4170 return print_trace_fmt(iter);
4171 }
4172
trace_latency_header(struct seq_file * m)4173 void trace_latency_header(struct seq_file *m)
4174 {
4175 struct trace_iterator *iter = m->private;
4176 struct trace_array *tr = iter->tr;
4177
4178 /* print nothing if the buffers are empty */
4179 if (trace_empty(iter))
4180 return;
4181
4182 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4183 print_trace_header(m, iter);
4184
4185 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4186 print_lat_help_header(m);
4187 }
4188
trace_default_header(struct seq_file * m)4189 void trace_default_header(struct seq_file *m)
4190 {
4191 struct trace_iterator *iter = m->private;
4192 struct trace_array *tr = iter->tr;
4193 unsigned long trace_flags = tr->trace_flags;
4194
4195 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4196 return;
4197
4198 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4199 /* print nothing if the buffers are empty */
4200 if (trace_empty(iter))
4201 return;
4202 print_trace_header(m, iter);
4203 if (!(trace_flags & TRACE_ITER_VERBOSE))
4204 print_lat_help_header(m);
4205 } else {
4206 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4207 if (trace_flags & TRACE_ITER_IRQ_INFO)
4208 print_func_help_header_irq(iter->array_buffer,
4209 m, trace_flags);
4210 else
4211 print_func_help_header(iter->array_buffer, m,
4212 trace_flags);
4213 }
4214 }
4215 }
4216
test_ftrace_alive(struct seq_file * m)4217 static void test_ftrace_alive(struct seq_file *m)
4218 {
4219 if (!ftrace_is_dead())
4220 return;
4221 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4222 "# MAY BE MISSING FUNCTION EVENTS\n");
4223 }
4224
4225 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4226 static void show_snapshot_main_help(struct seq_file *m)
4227 {
4228 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4229 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4230 "# Takes a snapshot of the main buffer.\n"
4231 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4232 "# (Doesn't have to be '2' works with any number that\n"
4233 "# is not a '0' or '1')\n");
4234 }
4235
show_snapshot_percpu_help(struct seq_file * m)4236 static void show_snapshot_percpu_help(struct seq_file *m)
4237 {
4238 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4239 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4240 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4241 "# Takes a snapshot of the main buffer for this cpu.\n");
4242 #else
4243 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4244 "# Must use main snapshot file to allocate.\n");
4245 #endif
4246 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4247 "# (Doesn't have to be '2' works with any number that\n"
4248 "# is not a '0' or '1')\n");
4249 }
4250
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4251 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4252 {
4253 if (iter->tr->allocated_snapshot)
4254 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4255 else
4256 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4257
4258 seq_puts(m, "# Snapshot commands:\n");
4259 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4260 show_snapshot_main_help(m);
4261 else
4262 show_snapshot_percpu_help(m);
4263 }
4264 #else
4265 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4266 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4267 #endif
4268
s_show(struct seq_file * m,void * v)4269 static int s_show(struct seq_file *m, void *v)
4270 {
4271 struct trace_iterator *iter = v;
4272 int ret;
4273
4274 if (iter->ent == NULL) {
4275 if (iter->tr) {
4276 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4277 seq_puts(m, "#\n");
4278 test_ftrace_alive(m);
4279 }
4280 if (iter->snapshot && trace_empty(iter))
4281 print_snapshot_help(m, iter);
4282 else if (iter->trace && iter->trace->print_header)
4283 iter->trace->print_header(m);
4284 else
4285 trace_default_header(m);
4286
4287 } else if (iter->leftover) {
4288 /*
4289 * If we filled the seq_file buffer earlier, we
4290 * want to just show it now.
4291 */
4292 ret = trace_print_seq(m, &iter->seq);
4293
4294 /* ret should this time be zero, but you never know */
4295 iter->leftover = ret;
4296
4297 } else {
4298 print_trace_line(iter);
4299 ret = trace_print_seq(m, &iter->seq);
4300 /*
4301 * If we overflow the seq_file buffer, then it will
4302 * ask us for this data again at start up.
4303 * Use that instead.
4304 * ret is 0 if seq_file write succeeded.
4305 * -1 otherwise.
4306 */
4307 iter->leftover = ret;
4308 }
4309
4310 return 0;
4311 }
4312
4313 /*
4314 * Should be used after trace_array_get(), trace_types_lock
4315 * ensures that i_cdev was already initialized.
4316 */
tracing_get_cpu(struct inode * inode)4317 static inline int tracing_get_cpu(struct inode *inode)
4318 {
4319 if (inode->i_cdev) /* See trace_create_cpu_file() */
4320 return (long)inode->i_cdev - 1;
4321 return RING_BUFFER_ALL_CPUS;
4322 }
4323
4324 static const struct seq_operations tracer_seq_ops = {
4325 .start = s_start,
4326 .next = s_next,
4327 .stop = s_stop,
4328 .show = s_show,
4329 };
4330
4331 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4332 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4333 {
4334 struct trace_array *tr = inode->i_private;
4335 struct trace_iterator *iter;
4336 int cpu;
4337
4338 if (tracing_disabled)
4339 return ERR_PTR(-ENODEV);
4340
4341 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4342 if (!iter)
4343 return ERR_PTR(-ENOMEM);
4344
4345 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4346 GFP_KERNEL);
4347 if (!iter->buffer_iter)
4348 goto release;
4349
4350 /*
4351 * trace_find_next_entry() may need to save off iter->ent.
4352 * It will place it into the iter->temp buffer. As most
4353 * events are less than 128, allocate a buffer of that size.
4354 * If one is greater, then trace_find_next_entry() will
4355 * allocate a new buffer to adjust for the bigger iter->ent.
4356 * It's not critical if it fails to get allocated here.
4357 */
4358 iter->temp = kmalloc(128, GFP_KERNEL);
4359 if (iter->temp)
4360 iter->temp_size = 128;
4361
4362 /*
4363 * We make a copy of the current tracer to avoid concurrent
4364 * changes on it while we are reading.
4365 */
4366 mutex_lock(&trace_types_lock);
4367 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4368 if (!iter->trace)
4369 goto fail;
4370
4371 *iter->trace = *tr->current_trace;
4372
4373 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4374 goto fail;
4375
4376 iter->tr = tr;
4377
4378 #ifdef CONFIG_TRACER_MAX_TRACE
4379 /* Currently only the top directory has a snapshot */
4380 if (tr->current_trace->print_max || snapshot)
4381 iter->array_buffer = &tr->max_buffer;
4382 else
4383 #endif
4384 iter->array_buffer = &tr->array_buffer;
4385 iter->snapshot = snapshot;
4386 iter->pos = -1;
4387 iter->cpu_file = tracing_get_cpu(inode);
4388 mutex_init(&iter->mutex);
4389
4390 /* Notify the tracer early; before we stop tracing. */
4391 if (iter->trace->open)
4392 iter->trace->open(iter);
4393
4394 /* Annotate start of buffers if we had overruns */
4395 if (ring_buffer_overruns(iter->array_buffer->buffer))
4396 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4397
4398 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4399 if (trace_clocks[tr->clock_id].in_ns)
4400 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4401
4402 /*
4403 * If pause-on-trace is enabled, then stop the trace while
4404 * dumping, unless this is the "snapshot" file
4405 */
4406 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4407 tracing_stop_tr(tr);
4408
4409 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4410 for_each_tracing_cpu(cpu) {
4411 iter->buffer_iter[cpu] =
4412 ring_buffer_read_prepare(iter->array_buffer->buffer,
4413 cpu, GFP_KERNEL);
4414 }
4415 ring_buffer_read_prepare_sync();
4416 for_each_tracing_cpu(cpu) {
4417 ring_buffer_read_start(iter->buffer_iter[cpu]);
4418 tracing_iter_reset(iter, cpu);
4419 }
4420 } else {
4421 cpu = iter->cpu_file;
4422 iter->buffer_iter[cpu] =
4423 ring_buffer_read_prepare(iter->array_buffer->buffer,
4424 cpu, GFP_KERNEL);
4425 ring_buffer_read_prepare_sync();
4426 ring_buffer_read_start(iter->buffer_iter[cpu]);
4427 tracing_iter_reset(iter, cpu);
4428 }
4429
4430 mutex_unlock(&trace_types_lock);
4431
4432 return iter;
4433
4434 fail:
4435 mutex_unlock(&trace_types_lock);
4436 kfree(iter->trace);
4437 kfree(iter->temp);
4438 kfree(iter->buffer_iter);
4439 release:
4440 seq_release_private(inode, file);
4441 return ERR_PTR(-ENOMEM);
4442 }
4443
tracing_open_generic(struct inode * inode,struct file * filp)4444 int tracing_open_generic(struct inode *inode, struct file *filp)
4445 {
4446 int ret;
4447
4448 ret = tracing_check_open_get_tr(NULL);
4449 if (ret)
4450 return ret;
4451
4452 filp->private_data = inode->i_private;
4453 return 0;
4454 }
4455
tracing_is_disabled(void)4456 bool tracing_is_disabled(void)
4457 {
4458 return (tracing_disabled) ? true: false;
4459 }
4460
4461 /*
4462 * Open and update trace_array ref count.
4463 * Must have the current trace_array passed to it.
4464 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4465 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4466 {
4467 struct trace_array *tr = inode->i_private;
4468 int ret;
4469
4470 ret = tracing_check_open_get_tr(tr);
4471 if (ret)
4472 return ret;
4473
4474 filp->private_data = inode->i_private;
4475
4476 return 0;
4477 }
4478
tracing_release(struct inode * inode,struct file * file)4479 static int tracing_release(struct inode *inode, struct file *file)
4480 {
4481 struct trace_array *tr = inode->i_private;
4482 struct seq_file *m = file->private_data;
4483 struct trace_iterator *iter;
4484 int cpu;
4485
4486 if (!(file->f_mode & FMODE_READ)) {
4487 trace_array_put(tr);
4488 return 0;
4489 }
4490
4491 /* Writes do not use seq_file */
4492 iter = m->private;
4493 mutex_lock(&trace_types_lock);
4494
4495 for_each_tracing_cpu(cpu) {
4496 if (iter->buffer_iter[cpu])
4497 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4498 }
4499
4500 if (iter->trace && iter->trace->close)
4501 iter->trace->close(iter);
4502
4503 if (!iter->snapshot && tr->stop_count)
4504 /* reenable tracing if it was previously enabled */
4505 tracing_start_tr(tr);
4506
4507 __trace_array_put(tr);
4508
4509 mutex_unlock(&trace_types_lock);
4510
4511 mutex_destroy(&iter->mutex);
4512 free_cpumask_var(iter->started);
4513 kfree(iter->temp);
4514 kfree(iter->trace);
4515 kfree(iter->buffer_iter);
4516 seq_release_private(inode, file);
4517
4518 return 0;
4519 }
4520
tracing_release_generic_tr(struct inode * inode,struct file * file)4521 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4522 {
4523 struct trace_array *tr = inode->i_private;
4524
4525 trace_array_put(tr);
4526 return 0;
4527 }
4528
tracing_single_release_tr(struct inode * inode,struct file * file)4529 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4530 {
4531 struct trace_array *tr = inode->i_private;
4532
4533 trace_array_put(tr);
4534
4535 return single_release(inode, file);
4536 }
4537
tracing_open(struct inode * inode,struct file * file)4538 static int tracing_open(struct inode *inode, struct file *file)
4539 {
4540 struct trace_array *tr = inode->i_private;
4541 struct trace_iterator *iter;
4542 int ret;
4543
4544 ret = tracing_check_open_get_tr(tr);
4545 if (ret)
4546 return ret;
4547
4548 /* If this file was open for write, then erase contents */
4549 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4550 int cpu = tracing_get_cpu(inode);
4551 struct array_buffer *trace_buf = &tr->array_buffer;
4552
4553 #ifdef CONFIG_TRACER_MAX_TRACE
4554 if (tr->current_trace->print_max)
4555 trace_buf = &tr->max_buffer;
4556 #endif
4557
4558 if (cpu == RING_BUFFER_ALL_CPUS)
4559 tracing_reset_online_cpus(trace_buf);
4560 else
4561 tracing_reset_cpu(trace_buf, cpu);
4562 }
4563
4564 if (file->f_mode & FMODE_READ) {
4565 iter = __tracing_open(inode, file, false);
4566 if (IS_ERR(iter))
4567 ret = PTR_ERR(iter);
4568 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4569 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4570 }
4571
4572 if (ret < 0)
4573 trace_array_put(tr);
4574
4575 return ret;
4576 }
4577
4578 /*
4579 * Some tracers are not suitable for instance buffers.
4580 * A tracer is always available for the global array (toplevel)
4581 * or if it explicitly states that it is.
4582 */
4583 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4584 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4585 {
4586 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4587 }
4588
4589 /* Find the next tracer that this trace array may use */
4590 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4591 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4592 {
4593 while (t && !trace_ok_for_array(t, tr))
4594 t = t->next;
4595
4596 return t;
4597 }
4598
4599 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4600 t_next(struct seq_file *m, void *v, loff_t *pos)
4601 {
4602 struct trace_array *tr = m->private;
4603 struct tracer *t = v;
4604
4605 (*pos)++;
4606
4607 if (t)
4608 t = get_tracer_for_array(tr, t->next);
4609
4610 return t;
4611 }
4612
t_start(struct seq_file * m,loff_t * pos)4613 static void *t_start(struct seq_file *m, loff_t *pos)
4614 {
4615 struct trace_array *tr = m->private;
4616 struct tracer *t;
4617 loff_t l = 0;
4618
4619 mutex_lock(&trace_types_lock);
4620
4621 t = get_tracer_for_array(tr, trace_types);
4622 for (; t && l < *pos; t = t_next(m, t, &l))
4623 ;
4624
4625 return t;
4626 }
4627
t_stop(struct seq_file * m,void * p)4628 static void t_stop(struct seq_file *m, void *p)
4629 {
4630 mutex_unlock(&trace_types_lock);
4631 }
4632
t_show(struct seq_file * m,void * v)4633 static int t_show(struct seq_file *m, void *v)
4634 {
4635 struct tracer *t = v;
4636
4637 if (!t)
4638 return 0;
4639
4640 seq_puts(m, t->name);
4641 if (t->next)
4642 seq_putc(m, ' ');
4643 else
4644 seq_putc(m, '\n');
4645
4646 return 0;
4647 }
4648
4649 static const struct seq_operations show_traces_seq_ops = {
4650 .start = t_start,
4651 .next = t_next,
4652 .stop = t_stop,
4653 .show = t_show,
4654 };
4655
show_traces_open(struct inode * inode,struct file * file)4656 static int show_traces_open(struct inode *inode, struct file *file)
4657 {
4658 struct trace_array *tr = inode->i_private;
4659 struct seq_file *m;
4660 int ret;
4661
4662 ret = tracing_check_open_get_tr(tr);
4663 if (ret)
4664 return ret;
4665
4666 ret = seq_open(file, &show_traces_seq_ops);
4667 if (ret) {
4668 trace_array_put(tr);
4669 return ret;
4670 }
4671
4672 m = file->private_data;
4673 m->private = tr;
4674
4675 return 0;
4676 }
4677
show_traces_release(struct inode * inode,struct file * file)4678 static int show_traces_release(struct inode *inode, struct file *file)
4679 {
4680 struct trace_array *tr = inode->i_private;
4681
4682 trace_array_put(tr);
4683 return seq_release(inode, file);
4684 }
4685
4686 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4687 tracing_write_stub(struct file *filp, const char __user *ubuf,
4688 size_t count, loff_t *ppos)
4689 {
4690 return count;
4691 }
4692
tracing_lseek(struct file * file,loff_t offset,int whence)4693 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4694 {
4695 int ret;
4696
4697 if (file->f_mode & FMODE_READ)
4698 ret = seq_lseek(file, offset, whence);
4699 else
4700 file->f_pos = ret = 0;
4701
4702 return ret;
4703 }
4704
4705 static const struct file_operations tracing_fops = {
4706 .open = tracing_open,
4707 .read = seq_read,
4708 .read_iter = seq_read_iter,
4709 .splice_read = generic_file_splice_read,
4710 .write = tracing_write_stub,
4711 .llseek = tracing_lseek,
4712 .release = tracing_release,
4713 };
4714
4715 static const struct file_operations show_traces_fops = {
4716 .open = show_traces_open,
4717 .read = seq_read,
4718 .llseek = seq_lseek,
4719 .release = show_traces_release,
4720 };
4721
4722 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4723 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4724 size_t count, loff_t *ppos)
4725 {
4726 struct trace_array *tr = file_inode(filp)->i_private;
4727 char *mask_str;
4728 int len;
4729
4730 len = snprintf(NULL, 0, "%*pb\n",
4731 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4732 mask_str = kmalloc(len, GFP_KERNEL);
4733 if (!mask_str)
4734 return -ENOMEM;
4735
4736 len = snprintf(mask_str, len, "%*pb\n",
4737 cpumask_pr_args(tr->tracing_cpumask));
4738 if (len >= count) {
4739 count = -EINVAL;
4740 goto out_err;
4741 }
4742 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4743
4744 out_err:
4745 kfree(mask_str);
4746
4747 return count;
4748 }
4749
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4750 int tracing_set_cpumask(struct trace_array *tr,
4751 cpumask_var_t tracing_cpumask_new)
4752 {
4753 int cpu;
4754
4755 if (!tr)
4756 return -EINVAL;
4757
4758 local_irq_disable();
4759 arch_spin_lock(&tr->max_lock);
4760 for_each_tracing_cpu(cpu) {
4761 /*
4762 * Increase/decrease the disabled counter if we are
4763 * about to flip a bit in the cpumask:
4764 */
4765 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4766 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4767 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4768 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4769 }
4770 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4771 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4772 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4773 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4774 }
4775 }
4776 arch_spin_unlock(&tr->max_lock);
4777 local_irq_enable();
4778
4779 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4780
4781 return 0;
4782 }
4783
4784 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4785 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4786 size_t count, loff_t *ppos)
4787 {
4788 struct trace_array *tr = file_inode(filp)->i_private;
4789 cpumask_var_t tracing_cpumask_new;
4790 int err;
4791
4792 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4793 return -ENOMEM;
4794
4795 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4796 if (err)
4797 goto err_free;
4798
4799 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4800 if (err)
4801 goto err_free;
4802
4803 free_cpumask_var(tracing_cpumask_new);
4804
4805 return count;
4806
4807 err_free:
4808 free_cpumask_var(tracing_cpumask_new);
4809
4810 return err;
4811 }
4812
4813 static const struct file_operations tracing_cpumask_fops = {
4814 .open = tracing_open_generic_tr,
4815 .read = tracing_cpumask_read,
4816 .write = tracing_cpumask_write,
4817 .release = tracing_release_generic_tr,
4818 .llseek = generic_file_llseek,
4819 };
4820
tracing_trace_options_show(struct seq_file * m,void * v)4821 static int tracing_trace_options_show(struct seq_file *m, void *v)
4822 {
4823 struct tracer_opt *trace_opts;
4824 struct trace_array *tr = m->private;
4825 u32 tracer_flags;
4826 int i;
4827
4828 mutex_lock(&trace_types_lock);
4829 tracer_flags = tr->current_trace->flags->val;
4830 trace_opts = tr->current_trace->flags->opts;
4831
4832 for (i = 0; trace_options[i]; i++) {
4833 if (tr->trace_flags & (1 << i))
4834 seq_printf(m, "%s\n", trace_options[i]);
4835 else
4836 seq_printf(m, "no%s\n", trace_options[i]);
4837 }
4838
4839 for (i = 0; trace_opts[i].name; i++) {
4840 if (tracer_flags & trace_opts[i].bit)
4841 seq_printf(m, "%s\n", trace_opts[i].name);
4842 else
4843 seq_printf(m, "no%s\n", trace_opts[i].name);
4844 }
4845 mutex_unlock(&trace_types_lock);
4846
4847 return 0;
4848 }
4849
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4850 static int __set_tracer_option(struct trace_array *tr,
4851 struct tracer_flags *tracer_flags,
4852 struct tracer_opt *opts, int neg)
4853 {
4854 struct tracer *trace = tracer_flags->trace;
4855 int ret;
4856
4857 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4858 if (ret)
4859 return ret;
4860
4861 if (neg)
4862 tracer_flags->val &= ~opts->bit;
4863 else
4864 tracer_flags->val |= opts->bit;
4865 return 0;
4866 }
4867
4868 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4869 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4870 {
4871 struct tracer *trace = tr->current_trace;
4872 struct tracer_flags *tracer_flags = trace->flags;
4873 struct tracer_opt *opts = NULL;
4874 int i;
4875
4876 for (i = 0; tracer_flags->opts[i].name; i++) {
4877 opts = &tracer_flags->opts[i];
4878
4879 if (strcmp(cmp, opts->name) == 0)
4880 return __set_tracer_option(tr, trace->flags, opts, neg);
4881 }
4882
4883 return -EINVAL;
4884 }
4885
4886 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4887 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4888 {
4889 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4890 return -1;
4891
4892 return 0;
4893 }
4894
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4895 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4896 {
4897 int *map;
4898
4899 if ((mask == TRACE_ITER_RECORD_TGID) ||
4900 (mask == TRACE_ITER_RECORD_CMD))
4901 lockdep_assert_held(&event_mutex);
4902
4903 /* do nothing if flag is already set */
4904 if (!!(tr->trace_flags & mask) == !!enabled)
4905 return 0;
4906
4907 /* Give the tracer a chance to approve the change */
4908 if (tr->current_trace->flag_changed)
4909 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4910 return -EINVAL;
4911
4912 if (enabled)
4913 tr->trace_flags |= mask;
4914 else
4915 tr->trace_flags &= ~mask;
4916
4917 if (mask == TRACE_ITER_RECORD_CMD)
4918 trace_event_enable_cmd_record(enabled);
4919
4920 if (mask == TRACE_ITER_RECORD_TGID) {
4921 if (!tgid_map) {
4922 tgid_map_max = pid_max;
4923 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4924 GFP_KERNEL);
4925
4926 /*
4927 * Pairs with smp_load_acquire() in
4928 * trace_find_tgid_ptr() to ensure that if it observes
4929 * the tgid_map we just allocated then it also observes
4930 * the corresponding tgid_map_max value.
4931 */
4932 smp_store_release(&tgid_map, map);
4933 }
4934 if (!tgid_map) {
4935 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4936 return -ENOMEM;
4937 }
4938
4939 trace_event_enable_tgid_record(enabled);
4940 }
4941
4942 if (mask == TRACE_ITER_EVENT_FORK)
4943 trace_event_follow_fork(tr, enabled);
4944
4945 if (mask == TRACE_ITER_FUNC_FORK)
4946 ftrace_pid_follow_fork(tr, enabled);
4947
4948 if (mask == TRACE_ITER_OVERWRITE) {
4949 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4950 #ifdef CONFIG_TRACER_MAX_TRACE
4951 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4952 #endif
4953 }
4954
4955 if (mask == TRACE_ITER_PRINTK) {
4956 trace_printk_start_stop_comm(enabled);
4957 trace_printk_control(enabled);
4958 }
4959
4960 return 0;
4961 }
4962
trace_set_options(struct trace_array * tr,char * option)4963 int trace_set_options(struct trace_array *tr, char *option)
4964 {
4965 char *cmp;
4966 int neg = 0;
4967 int ret;
4968 size_t orig_len = strlen(option);
4969 int len;
4970
4971 cmp = strstrip(option);
4972
4973 len = str_has_prefix(cmp, "no");
4974 if (len)
4975 neg = 1;
4976
4977 cmp += len;
4978
4979 mutex_lock(&event_mutex);
4980 mutex_lock(&trace_types_lock);
4981
4982 ret = match_string(trace_options, -1, cmp);
4983 /* If no option could be set, test the specific tracer options */
4984 if (ret < 0)
4985 ret = set_tracer_option(tr, cmp, neg);
4986 else
4987 ret = set_tracer_flag(tr, 1 << ret, !neg);
4988
4989 mutex_unlock(&trace_types_lock);
4990 mutex_unlock(&event_mutex);
4991
4992 /*
4993 * If the first trailing whitespace is replaced with '\0' by strstrip,
4994 * turn it back into a space.
4995 */
4996 if (orig_len > strlen(option))
4997 option[strlen(option)] = ' ';
4998
4999 return ret;
5000 }
5001
apply_trace_boot_options(void)5002 static void __init apply_trace_boot_options(void)
5003 {
5004 char *buf = trace_boot_options_buf;
5005 char *option;
5006
5007 while (true) {
5008 option = strsep(&buf, ",");
5009
5010 if (!option)
5011 break;
5012
5013 if (*option)
5014 trace_set_options(&global_trace, option);
5015
5016 /* Put back the comma to allow this to be called again */
5017 if (buf)
5018 *(buf - 1) = ',';
5019 }
5020 }
5021
5022 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5023 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5024 size_t cnt, loff_t *ppos)
5025 {
5026 struct seq_file *m = filp->private_data;
5027 struct trace_array *tr = m->private;
5028 char buf[64];
5029 int ret;
5030
5031 if (cnt >= sizeof(buf))
5032 return -EINVAL;
5033
5034 if (copy_from_user(buf, ubuf, cnt))
5035 return -EFAULT;
5036
5037 buf[cnt] = 0;
5038
5039 ret = trace_set_options(tr, buf);
5040 if (ret < 0)
5041 return ret;
5042
5043 *ppos += cnt;
5044
5045 return cnt;
5046 }
5047
tracing_trace_options_open(struct inode * inode,struct file * file)5048 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5049 {
5050 struct trace_array *tr = inode->i_private;
5051 int ret;
5052
5053 ret = tracing_check_open_get_tr(tr);
5054 if (ret)
5055 return ret;
5056
5057 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5058 if (ret < 0)
5059 trace_array_put(tr);
5060
5061 return ret;
5062 }
5063
5064 static const struct file_operations tracing_iter_fops = {
5065 .open = tracing_trace_options_open,
5066 .read = seq_read,
5067 .llseek = seq_lseek,
5068 .release = tracing_single_release_tr,
5069 .write = tracing_trace_options_write,
5070 };
5071
5072 static const char readme_msg[] =
5073 "tracing mini-HOWTO:\n\n"
5074 "# echo 0 > tracing_on : quick way to disable tracing\n"
5075 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5076 " Important files:\n"
5077 " trace\t\t\t- The static contents of the buffer\n"
5078 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5079 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5080 " current_tracer\t- function and latency tracers\n"
5081 " available_tracers\t- list of configured tracers for current_tracer\n"
5082 " error_log\t- error log for failed commands (that support it)\n"
5083 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5084 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5085 " trace_clock\t\t-change the clock used to order events\n"
5086 " local: Per cpu clock but may not be synced across CPUs\n"
5087 " global: Synced across CPUs but slows tracing down.\n"
5088 " counter: Not a clock, but just an increment\n"
5089 " uptime: Jiffy counter from time of boot\n"
5090 " perf: Same clock that perf events use\n"
5091 #ifdef CONFIG_X86_64
5092 " x86-tsc: TSC cycle counter\n"
5093 #endif
5094 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5095 " delta: Delta difference against a buffer-wide timestamp\n"
5096 " absolute: Absolute (standalone) timestamp\n"
5097 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5098 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5099 " tracing_cpumask\t- Limit which CPUs to trace\n"
5100 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5101 "\t\t\t Remove sub-buffer with rmdir\n"
5102 " trace_options\t\t- Set format or modify how tracing happens\n"
5103 "\t\t\t Disable an option by prefixing 'no' to the\n"
5104 "\t\t\t option name\n"
5105 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5106 #ifdef CONFIG_DYNAMIC_FTRACE
5107 "\n available_filter_functions - list of functions that can be filtered on\n"
5108 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5109 "\t\t\t functions\n"
5110 "\t accepts: func_full_name or glob-matching-pattern\n"
5111 "\t modules: Can select a group via module\n"
5112 "\t Format: :mod:<module-name>\n"
5113 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5114 "\t triggers: a command to perform when function is hit\n"
5115 "\t Format: <function>:<trigger>[:count]\n"
5116 "\t trigger: traceon, traceoff\n"
5117 "\t\t enable_event:<system>:<event>\n"
5118 "\t\t disable_event:<system>:<event>\n"
5119 #ifdef CONFIG_STACKTRACE
5120 "\t\t stacktrace\n"
5121 #endif
5122 #ifdef CONFIG_TRACER_SNAPSHOT
5123 "\t\t snapshot\n"
5124 #endif
5125 "\t\t dump\n"
5126 "\t\t cpudump\n"
5127 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5128 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5129 "\t The first one will disable tracing every time do_fault is hit\n"
5130 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5131 "\t The first time do trap is hit and it disables tracing, the\n"
5132 "\t counter will decrement to 2. If tracing is already disabled,\n"
5133 "\t the counter will not decrement. It only decrements when the\n"
5134 "\t trigger did work\n"
5135 "\t To remove trigger without count:\n"
5136 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5137 "\t To remove trigger with a count:\n"
5138 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5139 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5140 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5141 "\t modules: Can select a group via module command :mod:\n"
5142 "\t Does not accept triggers\n"
5143 #endif /* CONFIG_DYNAMIC_FTRACE */
5144 #ifdef CONFIG_FUNCTION_TRACER
5145 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5146 "\t\t (function)\n"
5147 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5148 "\t\t (function)\n"
5149 #endif
5150 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5151 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5152 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5153 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5154 #endif
5155 #ifdef CONFIG_TRACER_SNAPSHOT
5156 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5157 "\t\t\t snapshot buffer. Read the contents for more\n"
5158 "\t\t\t information\n"
5159 #endif
5160 #ifdef CONFIG_STACK_TRACER
5161 " stack_trace\t\t- Shows the max stack trace when active\n"
5162 " stack_max_size\t- Shows current max stack size that was traced\n"
5163 "\t\t\t Write into this file to reset the max size (trigger a\n"
5164 "\t\t\t new trace)\n"
5165 #ifdef CONFIG_DYNAMIC_FTRACE
5166 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5167 "\t\t\t traces\n"
5168 #endif
5169 #endif /* CONFIG_STACK_TRACER */
5170 #ifdef CONFIG_DYNAMIC_EVENTS
5171 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5172 "\t\t\t Write into this file to define/undefine new trace events.\n"
5173 #endif
5174 #ifdef CONFIG_KPROBE_EVENTS
5175 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5176 "\t\t\t Write into this file to define/undefine new trace events.\n"
5177 #endif
5178 #ifdef CONFIG_UPROBE_EVENTS
5179 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5180 "\t\t\t Write into this file to define/undefine new trace events.\n"
5181 #endif
5182 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5183 "\t accepts: event-definitions (one definition per line)\n"
5184 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5185 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5186 #ifdef CONFIG_HIST_TRIGGERS
5187 "\t s:[synthetic/]<event> <field> [<field>]\n"
5188 #endif
5189 "\t -:[<group>/]<event>\n"
5190 #ifdef CONFIG_KPROBE_EVENTS
5191 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5192 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5193 #endif
5194 #ifdef CONFIG_UPROBE_EVENTS
5195 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5196 #endif
5197 "\t args: <name>=fetcharg[:type]\n"
5198 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5199 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5200 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5201 #else
5202 "\t $stack<index>, $stack, $retval, $comm,\n"
5203 #endif
5204 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5205 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5206 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5207 "\t <type>\\[<array-size>\\]\n"
5208 #ifdef CONFIG_HIST_TRIGGERS
5209 "\t field: <stype> <name>;\n"
5210 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5211 "\t [unsigned] char/int/long\n"
5212 #endif
5213 #endif
5214 " events/\t\t- Directory containing all trace event subsystems:\n"
5215 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5216 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5217 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5218 "\t\t\t events\n"
5219 " filter\t\t- If set, only events passing filter are traced\n"
5220 " events/<system>/<event>/\t- Directory containing control files for\n"
5221 "\t\t\t <event>:\n"
5222 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5223 " filter\t\t- If set, only events passing filter are traced\n"
5224 " trigger\t\t- If set, a command to perform when event is hit\n"
5225 "\t Format: <trigger>[:count][if <filter>]\n"
5226 "\t trigger: traceon, traceoff\n"
5227 "\t enable_event:<system>:<event>\n"
5228 "\t disable_event:<system>:<event>\n"
5229 #ifdef CONFIG_HIST_TRIGGERS
5230 "\t enable_hist:<system>:<event>\n"
5231 "\t disable_hist:<system>:<event>\n"
5232 #endif
5233 #ifdef CONFIG_STACKTRACE
5234 "\t\t stacktrace\n"
5235 #endif
5236 #ifdef CONFIG_TRACER_SNAPSHOT
5237 "\t\t snapshot\n"
5238 #endif
5239 #ifdef CONFIG_HIST_TRIGGERS
5240 "\t\t hist (see below)\n"
5241 #endif
5242 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5243 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5244 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5245 "\t events/block/block_unplug/trigger\n"
5246 "\t The first disables tracing every time block_unplug is hit.\n"
5247 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5248 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5249 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5250 "\t Like function triggers, the counter is only decremented if it\n"
5251 "\t enabled or disabled tracing.\n"
5252 "\t To remove a trigger without a count:\n"
5253 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5254 "\t To remove a trigger with a count:\n"
5255 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5256 "\t Filters can be ignored when removing a trigger.\n"
5257 #ifdef CONFIG_HIST_TRIGGERS
5258 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5259 "\t Format: hist:keys=<field1[,field2,...]>\n"
5260 "\t [:values=<field1[,field2,...]>]\n"
5261 "\t [:sort=<field1[,field2,...]>]\n"
5262 "\t [:size=#entries]\n"
5263 "\t [:pause][:continue][:clear]\n"
5264 "\t [:name=histname1]\n"
5265 "\t [:<handler>.<action>]\n"
5266 "\t [if <filter>]\n\n"
5267 "\t Note, special fields can be used as well:\n"
5268 "\t common_timestamp - to record current timestamp\n"
5269 "\t common_cpu - to record the CPU the event happened on\n"
5270 "\n"
5271 "\t When a matching event is hit, an entry is added to a hash\n"
5272 "\t table using the key(s) and value(s) named, and the value of a\n"
5273 "\t sum called 'hitcount' is incremented. Keys and values\n"
5274 "\t correspond to fields in the event's format description. Keys\n"
5275 "\t can be any field, or the special string 'stacktrace'.\n"
5276 "\t Compound keys consisting of up to two fields can be specified\n"
5277 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5278 "\t fields. Sort keys consisting of up to two fields can be\n"
5279 "\t specified using the 'sort' keyword. The sort direction can\n"
5280 "\t be modified by appending '.descending' or '.ascending' to a\n"
5281 "\t sort field. The 'size' parameter can be used to specify more\n"
5282 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5283 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5284 "\t its histogram data will be shared with other triggers of the\n"
5285 "\t same name, and trigger hits will update this common data.\n\n"
5286 "\t Reading the 'hist' file for the event will dump the hash\n"
5287 "\t table in its entirety to stdout. If there are multiple hist\n"
5288 "\t triggers attached to an event, there will be a table for each\n"
5289 "\t trigger in the output. The table displayed for a named\n"
5290 "\t trigger will be the same as any other instance having the\n"
5291 "\t same name. The default format used to display a given field\n"
5292 "\t can be modified by appending any of the following modifiers\n"
5293 "\t to the field name, as applicable:\n\n"
5294 "\t .hex display a number as a hex value\n"
5295 "\t .sym display an address as a symbol\n"
5296 "\t .sym-offset display an address as a symbol and offset\n"
5297 "\t .execname display a common_pid as a program name\n"
5298 "\t .syscall display a syscall id as a syscall name\n"
5299 "\t .log2 display log2 value rather than raw number\n"
5300 "\t .usecs display a common_timestamp in microseconds\n\n"
5301 "\t The 'pause' parameter can be used to pause an existing hist\n"
5302 "\t trigger or to start a hist trigger but not log any events\n"
5303 "\t until told to do so. 'continue' can be used to start or\n"
5304 "\t restart a paused hist trigger.\n\n"
5305 "\t The 'clear' parameter will clear the contents of a running\n"
5306 "\t hist trigger and leave its current paused/active state\n"
5307 "\t unchanged.\n\n"
5308 "\t The enable_hist and disable_hist triggers can be used to\n"
5309 "\t have one event conditionally start and stop another event's\n"
5310 "\t already-attached hist trigger. The syntax is analogous to\n"
5311 "\t the enable_event and disable_event triggers.\n\n"
5312 "\t Hist trigger handlers and actions are executed whenever a\n"
5313 "\t a histogram entry is added or updated. They take the form:\n\n"
5314 "\t <handler>.<action>\n\n"
5315 "\t The available handlers are:\n\n"
5316 "\t onmatch(matching.event) - invoke on addition or update\n"
5317 "\t onmax(var) - invoke if var exceeds current max\n"
5318 "\t onchange(var) - invoke action if var changes\n\n"
5319 "\t The available actions are:\n\n"
5320 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5321 "\t save(field,...) - save current event fields\n"
5322 #ifdef CONFIG_TRACER_SNAPSHOT
5323 "\t snapshot() - snapshot the trace buffer\n\n"
5324 #endif
5325 #ifdef CONFIG_SYNTH_EVENTS
5326 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5327 "\t Write into this file to define/undefine new synthetic events.\n"
5328 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5329 #endif
5330 #endif
5331 ;
5332
5333 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5334 tracing_readme_read(struct file *filp, char __user *ubuf,
5335 size_t cnt, loff_t *ppos)
5336 {
5337 return simple_read_from_buffer(ubuf, cnt, ppos,
5338 readme_msg, strlen(readme_msg));
5339 }
5340
5341 static const struct file_operations tracing_readme_fops = {
5342 .open = tracing_open_generic,
5343 .read = tracing_readme_read,
5344 .llseek = generic_file_llseek,
5345 };
5346
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5347 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5348 {
5349 int pid = ++(*pos);
5350
5351 return trace_find_tgid_ptr(pid);
5352 }
5353
saved_tgids_start(struct seq_file * m,loff_t * pos)5354 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5355 {
5356 int pid = *pos;
5357
5358 return trace_find_tgid_ptr(pid);
5359 }
5360
saved_tgids_stop(struct seq_file * m,void * v)5361 static void saved_tgids_stop(struct seq_file *m, void *v)
5362 {
5363 }
5364
saved_tgids_show(struct seq_file * m,void * v)5365 static int saved_tgids_show(struct seq_file *m, void *v)
5366 {
5367 int *entry = (int *)v;
5368 int pid = entry - tgid_map;
5369 int tgid = *entry;
5370
5371 if (tgid == 0)
5372 return SEQ_SKIP;
5373
5374 seq_printf(m, "%d %d\n", pid, tgid);
5375 return 0;
5376 }
5377
5378 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5379 .start = saved_tgids_start,
5380 .stop = saved_tgids_stop,
5381 .next = saved_tgids_next,
5382 .show = saved_tgids_show,
5383 };
5384
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5385 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5386 {
5387 int ret;
5388
5389 ret = tracing_check_open_get_tr(NULL);
5390 if (ret)
5391 return ret;
5392
5393 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5394 }
5395
5396
5397 static const struct file_operations tracing_saved_tgids_fops = {
5398 .open = tracing_saved_tgids_open,
5399 .read = seq_read,
5400 .llseek = seq_lseek,
5401 .release = seq_release,
5402 };
5403
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5404 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5405 {
5406 unsigned int *ptr = v;
5407
5408 if (*pos || m->count)
5409 ptr++;
5410
5411 (*pos)++;
5412
5413 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5414 ptr++) {
5415 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5416 continue;
5417
5418 return ptr;
5419 }
5420
5421 return NULL;
5422 }
5423
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5424 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5425 {
5426 void *v;
5427 loff_t l = 0;
5428
5429 preempt_disable();
5430 arch_spin_lock(&trace_cmdline_lock);
5431
5432 v = &savedcmd->map_cmdline_to_pid[0];
5433 while (l <= *pos) {
5434 v = saved_cmdlines_next(m, v, &l);
5435 if (!v)
5436 return NULL;
5437 }
5438
5439 return v;
5440 }
5441
saved_cmdlines_stop(struct seq_file * m,void * v)5442 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5443 {
5444 arch_spin_unlock(&trace_cmdline_lock);
5445 preempt_enable();
5446 }
5447
saved_cmdlines_show(struct seq_file * m,void * v)5448 static int saved_cmdlines_show(struct seq_file *m, void *v)
5449 {
5450 char buf[TASK_COMM_LEN];
5451 unsigned int *pid = v;
5452
5453 __trace_find_cmdline(*pid, buf);
5454 seq_printf(m, "%d %s\n", *pid, buf);
5455 return 0;
5456 }
5457
5458 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5459 .start = saved_cmdlines_start,
5460 .next = saved_cmdlines_next,
5461 .stop = saved_cmdlines_stop,
5462 .show = saved_cmdlines_show,
5463 };
5464
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5465 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5466 {
5467 int ret;
5468
5469 ret = tracing_check_open_get_tr(NULL);
5470 if (ret)
5471 return ret;
5472
5473 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5474 }
5475
5476 static const struct file_operations tracing_saved_cmdlines_fops = {
5477 .open = tracing_saved_cmdlines_open,
5478 .read = seq_read,
5479 .llseek = seq_lseek,
5480 .release = seq_release,
5481 };
5482
5483 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5484 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5485 size_t cnt, loff_t *ppos)
5486 {
5487 char buf[64];
5488 int r;
5489
5490 preempt_disable();
5491 arch_spin_lock(&trace_cmdline_lock);
5492 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5493 arch_spin_unlock(&trace_cmdline_lock);
5494 preempt_enable();
5495
5496 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5497 }
5498
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)5499 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5500 {
5501 kfree(s->saved_cmdlines);
5502 kfree(s->map_cmdline_to_pid);
5503 kfree(s);
5504 }
5505
tracing_resize_saved_cmdlines(unsigned int val)5506 static int tracing_resize_saved_cmdlines(unsigned int val)
5507 {
5508 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5509
5510 s = kmalloc(sizeof(*s), GFP_KERNEL);
5511 if (!s)
5512 return -ENOMEM;
5513
5514 if (allocate_cmdlines_buffer(val, s) < 0) {
5515 kfree(s);
5516 return -ENOMEM;
5517 }
5518
5519 preempt_disable();
5520 arch_spin_lock(&trace_cmdline_lock);
5521 savedcmd_temp = savedcmd;
5522 savedcmd = s;
5523 arch_spin_unlock(&trace_cmdline_lock);
5524 preempt_enable();
5525 free_saved_cmdlines_buffer(savedcmd_temp);
5526
5527 return 0;
5528 }
5529
5530 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5531 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5532 size_t cnt, loff_t *ppos)
5533 {
5534 unsigned long val;
5535 int ret;
5536
5537 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5538 if (ret)
5539 return ret;
5540
5541 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5542 if (!val || val > PID_MAX_DEFAULT)
5543 return -EINVAL;
5544
5545 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5546 if (ret < 0)
5547 return ret;
5548
5549 *ppos += cnt;
5550
5551 return cnt;
5552 }
5553
5554 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5555 .open = tracing_open_generic,
5556 .read = tracing_saved_cmdlines_size_read,
5557 .write = tracing_saved_cmdlines_size_write,
5558 };
5559
5560 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5561 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5562 update_eval_map(union trace_eval_map_item *ptr)
5563 {
5564 if (!ptr->map.eval_string) {
5565 if (ptr->tail.next) {
5566 ptr = ptr->tail.next;
5567 /* Set ptr to the next real item (skip head) */
5568 ptr++;
5569 } else
5570 return NULL;
5571 }
5572 return ptr;
5573 }
5574
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5575 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5576 {
5577 union trace_eval_map_item *ptr = v;
5578
5579 /*
5580 * Paranoid! If ptr points to end, we don't want to increment past it.
5581 * This really should never happen.
5582 */
5583 (*pos)++;
5584 ptr = update_eval_map(ptr);
5585 if (WARN_ON_ONCE(!ptr))
5586 return NULL;
5587
5588 ptr++;
5589 ptr = update_eval_map(ptr);
5590
5591 return ptr;
5592 }
5593
eval_map_start(struct seq_file * m,loff_t * pos)5594 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5595 {
5596 union trace_eval_map_item *v;
5597 loff_t l = 0;
5598
5599 mutex_lock(&trace_eval_mutex);
5600
5601 v = trace_eval_maps;
5602 if (v)
5603 v++;
5604
5605 while (v && l < *pos) {
5606 v = eval_map_next(m, v, &l);
5607 }
5608
5609 return v;
5610 }
5611
eval_map_stop(struct seq_file * m,void * v)5612 static void eval_map_stop(struct seq_file *m, void *v)
5613 {
5614 mutex_unlock(&trace_eval_mutex);
5615 }
5616
eval_map_show(struct seq_file * m,void * v)5617 static int eval_map_show(struct seq_file *m, void *v)
5618 {
5619 union trace_eval_map_item *ptr = v;
5620
5621 seq_printf(m, "%s %ld (%s)\n",
5622 ptr->map.eval_string, ptr->map.eval_value,
5623 ptr->map.system);
5624
5625 return 0;
5626 }
5627
5628 static const struct seq_operations tracing_eval_map_seq_ops = {
5629 .start = eval_map_start,
5630 .next = eval_map_next,
5631 .stop = eval_map_stop,
5632 .show = eval_map_show,
5633 };
5634
tracing_eval_map_open(struct inode * inode,struct file * filp)5635 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5636 {
5637 int ret;
5638
5639 ret = tracing_check_open_get_tr(NULL);
5640 if (ret)
5641 return ret;
5642
5643 return seq_open(filp, &tracing_eval_map_seq_ops);
5644 }
5645
5646 static const struct file_operations tracing_eval_map_fops = {
5647 .open = tracing_eval_map_open,
5648 .read = seq_read,
5649 .llseek = seq_lseek,
5650 .release = seq_release,
5651 };
5652
5653 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5654 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5655 {
5656 /* Return tail of array given the head */
5657 return ptr + ptr->head.length + 1;
5658 }
5659
5660 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5661 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5662 int len)
5663 {
5664 struct trace_eval_map **stop;
5665 struct trace_eval_map **map;
5666 union trace_eval_map_item *map_array;
5667 union trace_eval_map_item *ptr;
5668
5669 stop = start + len;
5670
5671 /*
5672 * The trace_eval_maps contains the map plus a head and tail item,
5673 * where the head holds the module and length of array, and the
5674 * tail holds a pointer to the next list.
5675 */
5676 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5677 if (!map_array) {
5678 pr_warn("Unable to allocate trace eval mapping\n");
5679 return;
5680 }
5681
5682 mutex_lock(&trace_eval_mutex);
5683
5684 if (!trace_eval_maps)
5685 trace_eval_maps = map_array;
5686 else {
5687 ptr = trace_eval_maps;
5688 for (;;) {
5689 ptr = trace_eval_jmp_to_tail(ptr);
5690 if (!ptr->tail.next)
5691 break;
5692 ptr = ptr->tail.next;
5693
5694 }
5695 ptr->tail.next = map_array;
5696 }
5697 map_array->head.mod = mod;
5698 map_array->head.length = len;
5699 map_array++;
5700
5701 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5702 map_array->map = **map;
5703 map_array++;
5704 }
5705 memset(map_array, 0, sizeof(*map_array));
5706
5707 mutex_unlock(&trace_eval_mutex);
5708 }
5709
trace_create_eval_file(struct dentry * d_tracer)5710 static void trace_create_eval_file(struct dentry *d_tracer)
5711 {
5712 trace_create_file("eval_map", 0444, d_tracer,
5713 NULL, &tracing_eval_map_fops);
5714 }
5715
5716 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5717 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5718 static inline void trace_insert_eval_map_file(struct module *mod,
5719 struct trace_eval_map **start, int len) { }
5720 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5721
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5722 static void trace_insert_eval_map(struct module *mod,
5723 struct trace_eval_map **start, int len)
5724 {
5725 struct trace_eval_map **map;
5726
5727 if (len <= 0)
5728 return;
5729
5730 map = start;
5731
5732 trace_event_eval_update(map, len);
5733
5734 trace_insert_eval_map_file(mod, start, len);
5735 }
5736
5737 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5738 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5739 size_t cnt, loff_t *ppos)
5740 {
5741 struct trace_array *tr = filp->private_data;
5742 char buf[MAX_TRACER_SIZE+2];
5743 int r;
5744
5745 mutex_lock(&trace_types_lock);
5746 r = sprintf(buf, "%s\n", tr->current_trace->name);
5747 mutex_unlock(&trace_types_lock);
5748
5749 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5750 }
5751
tracer_init(struct tracer * t,struct trace_array * tr)5752 int tracer_init(struct tracer *t, struct trace_array *tr)
5753 {
5754 tracing_reset_online_cpus(&tr->array_buffer);
5755 return t->init(tr);
5756 }
5757
set_buffer_entries(struct array_buffer * buf,unsigned long val)5758 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5759 {
5760 int cpu;
5761
5762 for_each_tracing_cpu(cpu)
5763 per_cpu_ptr(buf->data, cpu)->entries = val;
5764 }
5765
5766 #ifdef CONFIG_TRACER_MAX_TRACE
5767 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5768 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5769 struct array_buffer *size_buf, int cpu_id)
5770 {
5771 int cpu, ret = 0;
5772
5773 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5774 for_each_tracing_cpu(cpu) {
5775 ret = ring_buffer_resize(trace_buf->buffer,
5776 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5777 if (ret < 0)
5778 break;
5779 per_cpu_ptr(trace_buf->data, cpu)->entries =
5780 per_cpu_ptr(size_buf->data, cpu)->entries;
5781 }
5782 } else {
5783 ret = ring_buffer_resize(trace_buf->buffer,
5784 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5785 if (ret == 0)
5786 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5787 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5788 }
5789
5790 return ret;
5791 }
5792 #endif /* CONFIG_TRACER_MAX_TRACE */
5793
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5794 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5795 unsigned long size, int cpu)
5796 {
5797 int ret;
5798
5799 /*
5800 * If kernel or user changes the size of the ring buffer
5801 * we use the size that was given, and we can forget about
5802 * expanding it later.
5803 */
5804 ring_buffer_expanded = true;
5805
5806 /* May be called before buffers are initialized */
5807 if (!tr->array_buffer.buffer)
5808 return 0;
5809
5810 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5811 if (ret < 0)
5812 return ret;
5813
5814 #ifdef CONFIG_TRACER_MAX_TRACE
5815 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5816 !tr->current_trace->use_max_tr)
5817 goto out;
5818
5819 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5820 if (ret < 0) {
5821 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5822 &tr->array_buffer, cpu);
5823 if (r < 0) {
5824 /*
5825 * AARGH! We are left with different
5826 * size max buffer!!!!
5827 * The max buffer is our "snapshot" buffer.
5828 * When a tracer needs a snapshot (one of the
5829 * latency tracers), it swaps the max buffer
5830 * with the saved snap shot. We succeeded to
5831 * update the size of the main buffer, but failed to
5832 * update the size of the max buffer. But when we tried
5833 * to reset the main buffer to the original size, we
5834 * failed there too. This is very unlikely to
5835 * happen, but if it does, warn and kill all
5836 * tracing.
5837 */
5838 WARN_ON(1);
5839 tracing_disabled = 1;
5840 }
5841 return ret;
5842 }
5843
5844 if (cpu == RING_BUFFER_ALL_CPUS)
5845 set_buffer_entries(&tr->max_buffer, size);
5846 else
5847 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5848
5849 out:
5850 #endif /* CONFIG_TRACER_MAX_TRACE */
5851
5852 if (cpu == RING_BUFFER_ALL_CPUS)
5853 set_buffer_entries(&tr->array_buffer, size);
5854 else
5855 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5856
5857 return ret;
5858 }
5859
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5860 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5861 unsigned long size, int cpu_id)
5862 {
5863 int ret = size;
5864
5865 mutex_lock(&trace_types_lock);
5866
5867 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5868 /* make sure, this cpu is enabled in the mask */
5869 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5870 ret = -EINVAL;
5871 goto out;
5872 }
5873 }
5874
5875 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5876 if (ret < 0)
5877 ret = -ENOMEM;
5878
5879 out:
5880 mutex_unlock(&trace_types_lock);
5881
5882 return ret;
5883 }
5884
5885
5886 /**
5887 * tracing_update_buffers - used by tracing facility to expand ring buffers
5888 *
5889 * To save on memory when the tracing is never used on a system with it
5890 * configured in. The ring buffers are set to a minimum size. But once
5891 * a user starts to use the tracing facility, then they need to grow
5892 * to their default size.
5893 *
5894 * This function is to be called when a tracer is about to be used.
5895 */
tracing_update_buffers(void)5896 int tracing_update_buffers(void)
5897 {
5898 int ret = 0;
5899
5900 mutex_lock(&trace_types_lock);
5901 if (!ring_buffer_expanded)
5902 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5903 RING_BUFFER_ALL_CPUS);
5904 mutex_unlock(&trace_types_lock);
5905
5906 return ret;
5907 }
5908
5909 struct trace_option_dentry;
5910
5911 static void
5912 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5913
5914 /*
5915 * Used to clear out the tracer before deletion of an instance.
5916 * Must have trace_types_lock held.
5917 */
tracing_set_nop(struct trace_array * tr)5918 static void tracing_set_nop(struct trace_array *tr)
5919 {
5920 if (tr->current_trace == &nop_trace)
5921 return;
5922
5923 tr->current_trace->enabled--;
5924
5925 if (tr->current_trace->reset)
5926 tr->current_trace->reset(tr);
5927
5928 tr->current_trace = &nop_trace;
5929 }
5930
5931 static bool tracer_options_updated;
5932
add_tracer_options(struct trace_array * tr,struct tracer * t)5933 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5934 {
5935 /* Only enable if the directory has been created already. */
5936 if (!tr->dir)
5937 return;
5938
5939 /* Only create trace option files after update_tracer_options finish */
5940 if (!tracer_options_updated)
5941 return;
5942
5943 create_trace_option_files(tr, t);
5944 }
5945
tracing_set_tracer(struct trace_array * tr,const char * buf)5946 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5947 {
5948 struct tracer *t;
5949 #ifdef CONFIG_TRACER_MAX_TRACE
5950 bool had_max_tr;
5951 #endif
5952 int ret = 0;
5953
5954 mutex_lock(&trace_types_lock);
5955
5956 if (!ring_buffer_expanded) {
5957 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5958 RING_BUFFER_ALL_CPUS);
5959 if (ret < 0)
5960 goto out;
5961 ret = 0;
5962 }
5963
5964 for (t = trace_types; t; t = t->next) {
5965 if (strcmp(t->name, buf) == 0)
5966 break;
5967 }
5968 if (!t) {
5969 ret = -EINVAL;
5970 goto out;
5971 }
5972 if (t == tr->current_trace)
5973 goto out;
5974
5975 #ifdef CONFIG_TRACER_SNAPSHOT
5976 if (t->use_max_tr) {
5977 local_irq_disable();
5978 arch_spin_lock(&tr->max_lock);
5979 if (tr->cond_snapshot)
5980 ret = -EBUSY;
5981 arch_spin_unlock(&tr->max_lock);
5982 local_irq_enable();
5983 if (ret)
5984 goto out;
5985 }
5986 #endif
5987 /* Some tracers won't work on kernel command line */
5988 if (system_state < SYSTEM_RUNNING && t->noboot) {
5989 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5990 t->name);
5991 goto out;
5992 }
5993
5994 /* Some tracers are only allowed for the top level buffer */
5995 if (!trace_ok_for_array(t, tr)) {
5996 ret = -EINVAL;
5997 goto out;
5998 }
5999
6000 /* If trace pipe files are being read, we can't change the tracer */
6001 if (tr->trace_ref) {
6002 ret = -EBUSY;
6003 goto out;
6004 }
6005
6006 trace_branch_disable();
6007
6008 tr->current_trace->enabled--;
6009
6010 if (tr->current_trace->reset)
6011 tr->current_trace->reset(tr);
6012
6013 #ifdef CONFIG_TRACER_MAX_TRACE
6014 had_max_tr = tr->current_trace->use_max_tr;
6015
6016 /* Current trace needs to be nop_trace before synchronize_rcu */
6017 tr->current_trace = &nop_trace;
6018
6019 if (had_max_tr && !t->use_max_tr) {
6020 /*
6021 * We need to make sure that the update_max_tr sees that
6022 * current_trace changed to nop_trace to keep it from
6023 * swapping the buffers after we resize it.
6024 * The update_max_tr is called from interrupts disabled
6025 * so a synchronized_sched() is sufficient.
6026 */
6027 synchronize_rcu();
6028 free_snapshot(tr);
6029 }
6030
6031 if (t->use_max_tr && !tr->allocated_snapshot) {
6032 ret = tracing_alloc_snapshot_instance(tr);
6033 if (ret < 0)
6034 goto out;
6035 }
6036 #else
6037 tr->current_trace = &nop_trace;
6038 #endif
6039
6040 if (t->init) {
6041 ret = tracer_init(t, tr);
6042 if (ret)
6043 goto out;
6044 }
6045
6046 tr->current_trace = t;
6047 tr->current_trace->enabled++;
6048 trace_branch_enable(tr);
6049 out:
6050 mutex_unlock(&trace_types_lock);
6051
6052 return ret;
6053 }
6054
6055 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6056 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6057 size_t cnt, loff_t *ppos)
6058 {
6059 struct trace_array *tr = filp->private_data;
6060 char buf[MAX_TRACER_SIZE+1];
6061 int i;
6062 size_t ret;
6063 int err;
6064
6065 ret = cnt;
6066
6067 if (cnt > MAX_TRACER_SIZE)
6068 cnt = MAX_TRACER_SIZE;
6069
6070 if (copy_from_user(buf, ubuf, cnt))
6071 return -EFAULT;
6072
6073 buf[cnt] = 0;
6074
6075 /* strip ending whitespace. */
6076 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6077 buf[i] = 0;
6078
6079 err = tracing_set_tracer(tr, buf);
6080 if (err)
6081 return err;
6082
6083 *ppos += ret;
6084
6085 return ret;
6086 }
6087
6088 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6089 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6090 size_t cnt, loff_t *ppos)
6091 {
6092 char buf[64];
6093 int r;
6094
6095 r = snprintf(buf, sizeof(buf), "%ld\n",
6096 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6097 if (r > sizeof(buf))
6098 r = sizeof(buf);
6099 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6100 }
6101
6102 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6103 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6104 size_t cnt, loff_t *ppos)
6105 {
6106 unsigned long val;
6107 int ret;
6108
6109 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6110 if (ret)
6111 return ret;
6112
6113 *ptr = val * 1000;
6114
6115 return cnt;
6116 }
6117
6118 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6119 tracing_thresh_read(struct file *filp, char __user *ubuf,
6120 size_t cnt, loff_t *ppos)
6121 {
6122 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6123 }
6124
6125 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6126 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6127 size_t cnt, loff_t *ppos)
6128 {
6129 struct trace_array *tr = filp->private_data;
6130 int ret;
6131
6132 mutex_lock(&trace_types_lock);
6133 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6134 if (ret < 0)
6135 goto out;
6136
6137 if (tr->current_trace->update_thresh) {
6138 ret = tr->current_trace->update_thresh(tr);
6139 if (ret < 0)
6140 goto out;
6141 }
6142
6143 ret = cnt;
6144 out:
6145 mutex_unlock(&trace_types_lock);
6146
6147 return ret;
6148 }
6149
6150 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6151
6152 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6153 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6154 size_t cnt, loff_t *ppos)
6155 {
6156 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6157 }
6158
6159 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6160 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6161 size_t cnt, loff_t *ppos)
6162 {
6163 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6164 }
6165
6166 #endif
6167
tracing_open_pipe(struct inode * inode,struct file * filp)6168 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6169 {
6170 struct trace_array *tr = inode->i_private;
6171 struct trace_iterator *iter;
6172 int ret;
6173
6174 ret = tracing_check_open_get_tr(tr);
6175 if (ret)
6176 return ret;
6177
6178 mutex_lock(&trace_types_lock);
6179
6180 /* create a buffer to store the information to pass to userspace */
6181 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6182 if (!iter) {
6183 ret = -ENOMEM;
6184 __trace_array_put(tr);
6185 goto out;
6186 }
6187
6188 trace_seq_init(&iter->seq);
6189 iter->trace = tr->current_trace;
6190
6191 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6192 ret = -ENOMEM;
6193 goto fail;
6194 }
6195
6196 /* trace pipe does not show start of buffer */
6197 cpumask_setall(iter->started);
6198
6199 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6200 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6201
6202 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6203 if (trace_clocks[tr->clock_id].in_ns)
6204 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6205
6206 iter->tr = tr;
6207 iter->array_buffer = &tr->array_buffer;
6208 iter->cpu_file = tracing_get_cpu(inode);
6209 mutex_init(&iter->mutex);
6210 filp->private_data = iter;
6211
6212 if (iter->trace->pipe_open)
6213 iter->trace->pipe_open(iter);
6214
6215 nonseekable_open(inode, filp);
6216
6217 tr->trace_ref++;
6218 out:
6219 mutex_unlock(&trace_types_lock);
6220 return ret;
6221
6222 fail:
6223 kfree(iter);
6224 __trace_array_put(tr);
6225 mutex_unlock(&trace_types_lock);
6226 return ret;
6227 }
6228
tracing_release_pipe(struct inode * inode,struct file * file)6229 static int tracing_release_pipe(struct inode *inode, struct file *file)
6230 {
6231 struct trace_iterator *iter = file->private_data;
6232 struct trace_array *tr = inode->i_private;
6233
6234 mutex_lock(&trace_types_lock);
6235
6236 tr->trace_ref--;
6237
6238 if (iter->trace->pipe_close)
6239 iter->trace->pipe_close(iter);
6240
6241 mutex_unlock(&trace_types_lock);
6242
6243 free_cpumask_var(iter->started);
6244 mutex_destroy(&iter->mutex);
6245 kfree(iter);
6246
6247 trace_array_put(tr);
6248
6249 return 0;
6250 }
6251
6252 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6253 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6254 {
6255 struct trace_array *tr = iter->tr;
6256
6257 /* Iterators are static, they should be filled or empty */
6258 if (trace_buffer_iter(iter, iter->cpu_file))
6259 return EPOLLIN | EPOLLRDNORM;
6260
6261 if (tr->trace_flags & TRACE_ITER_BLOCK)
6262 /*
6263 * Always select as readable when in blocking mode
6264 */
6265 return EPOLLIN | EPOLLRDNORM;
6266 else
6267 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6268 filp, poll_table, iter->tr->buffer_percent);
6269 }
6270
6271 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6272 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6273 {
6274 struct trace_iterator *iter = filp->private_data;
6275
6276 return trace_poll(iter, filp, poll_table);
6277 }
6278
6279 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6280 static int tracing_wait_pipe(struct file *filp)
6281 {
6282 struct trace_iterator *iter = filp->private_data;
6283 int ret;
6284
6285 while (trace_empty(iter)) {
6286
6287 if ((filp->f_flags & O_NONBLOCK)) {
6288 return -EAGAIN;
6289 }
6290
6291 /*
6292 * We block until we read something and tracing is disabled.
6293 * We still block if tracing is disabled, but we have never
6294 * read anything. This allows a user to cat this file, and
6295 * then enable tracing. But after we have read something,
6296 * we give an EOF when tracing is again disabled.
6297 *
6298 * iter->pos will be 0 if we haven't read anything.
6299 */
6300 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6301 break;
6302
6303 mutex_unlock(&iter->mutex);
6304
6305 ret = wait_on_pipe(iter, 0);
6306
6307 mutex_lock(&iter->mutex);
6308
6309 if (ret)
6310 return ret;
6311 }
6312
6313 return 1;
6314 }
6315
6316 /*
6317 * Consumer reader.
6318 */
6319 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6320 tracing_read_pipe(struct file *filp, char __user *ubuf,
6321 size_t cnt, loff_t *ppos)
6322 {
6323 struct trace_iterator *iter = filp->private_data;
6324 ssize_t sret;
6325
6326 /*
6327 * Avoid more than one consumer on a single file descriptor
6328 * This is just a matter of traces coherency, the ring buffer itself
6329 * is protected.
6330 */
6331 mutex_lock(&iter->mutex);
6332
6333 /* return any leftover data */
6334 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6335 if (sret != -EBUSY)
6336 goto out;
6337
6338 trace_seq_init(&iter->seq);
6339
6340 if (iter->trace->read) {
6341 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6342 if (sret)
6343 goto out;
6344 }
6345
6346 waitagain:
6347 sret = tracing_wait_pipe(filp);
6348 if (sret <= 0)
6349 goto out;
6350
6351 /* stop when tracing is finished */
6352 if (trace_empty(iter)) {
6353 sret = 0;
6354 goto out;
6355 }
6356
6357 if (cnt >= PAGE_SIZE)
6358 cnt = PAGE_SIZE - 1;
6359
6360 /* reset all but tr, trace, and overruns */
6361 memset(&iter->seq, 0,
6362 sizeof(struct trace_iterator) -
6363 offsetof(struct trace_iterator, seq));
6364 cpumask_clear(iter->started);
6365 trace_seq_init(&iter->seq);
6366 iter->pos = -1;
6367
6368 trace_event_read_lock();
6369 trace_access_lock(iter->cpu_file);
6370 while (trace_find_next_entry_inc(iter) != NULL) {
6371 enum print_line_t ret;
6372 int save_len = iter->seq.seq.len;
6373
6374 ret = print_trace_line(iter);
6375 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6376 /*
6377 * If one print_trace_line() fills entire trace_seq in one shot,
6378 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6379 * In this case, we need to consume it, otherwise, loop will peek
6380 * this event next time, resulting in an infinite loop.
6381 */
6382 if (save_len == 0) {
6383 iter->seq.full = 0;
6384 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6385 trace_consume(iter);
6386 break;
6387 }
6388
6389 /* In other cases, don't print partial lines */
6390 iter->seq.seq.len = save_len;
6391 break;
6392 }
6393 if (ret != TRACE_TYPE_NO_CONSUME)
6394 trace_consume(iter);
6395
6396 if (trace_seq_used(&iter->seq) >= cnt)
6397 break;
6398
6399 /*
6400 * Setting the full flag means we reached the trace_seq buffer
6401 * size and we should leave by partial output condition above.
6402 * One of the trace_seq_* functions is not used properly.
6403 */
6404 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6405 iter->ent->type);
6406 }
6407 trace_access_unlock(iter->cpu_file);
6408 trace_event_read_unlock();
6409
6410 /* Now copy what we have to the user */
6411 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6412 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6413 trace_seq_init(&iter->seq);
6414
6415 /*
6416 * If there was nothing to send to user, in spite of consuming trace
6417 * entries, go back to wait for more entries.
6418 */
6419 if (sret == -EBUSY)
6420 goto waitagain;
6421
6422 out:
6423 mutex_unlock(&iter->mutex);
6424
6425 return sret;
6426 }
6427
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6428 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6429 unsigned int idx)
6430 {
6431 __free_page(spd->pages[idx]);
6432 }
6433
6434 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6435 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6436 {
6437 size_t count;
6438 int save_len;
6439 int ret;
6440
6441 /* Seq buffer is page-sized, exactly what we need. */
6442 for (;;) {
6443 save_len = iter->seq.seq.len;
6444 ret = print_trace_line(iter);
6445
6446 if (trace_seq_has_overflowed(&iter->seq)) {
6447 iter->seq.seq.len = save_len;
6448 break;
6449 }
6450
6451 /*
6452 * This should not be hit, because it should only
6453 * be set if the iter->seq overflowed. But check it
6454 * anyway to be safe.
6455 */
6456 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6457 iter->seq.seq.len = save_len;
6458 break;
6459 }
6460
6461 count = trace_seq_used(&iter->seq) - save_len;
6462 if (rem < count) {
6463 rem = 0;
6464 iter->seq.seq.len = save_len;
6465 break;
6466 }
6467
6468 if (ret != TRACE_TYPE_NO_CONSUME)
6469 trace_consume(iter);
6470 rem -= count;
6471 if (!trace_find_next_entry_inc(iter)) {
6472 rem = 0;
6473 iter->ent = NULL;
6474 break;
6475 }
6476 }
6477
6478 return rem;
6479 }
6480
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6481 static ssize_t tracing_splice_read_pipe(struct file *filp,
6482 loff_t *ppos,
6483 struct pipe_inode_info *pipe,
6484 size_t len,
6485 unsigned int flags)
6486 {
6487 struct page *pages_def[PIPE_DEF_BUFFERS];
6488 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6489 struct trace_iterator *iter = filp->private_data;
6490 struct splice_pipe_desc spd = {
6491 .pages = pages_def,
6492 .partial = partial_def,
6493 .nr_pages = 0, /* This gets updated below. */
6494 .nr_pages_max = PIPE_DEF_BUFFERS,
6495 .ops = &default_pipe_buf_ops,
6496 .spd_release = tracing_spd_release_pipe,
6497 };
6498 ssize_t ret;
6499 size_t rem;
6500 unsigned int i;
6501
6502 if (splice_grow_spd(pipe, &spd))
6503 return -ENOMEM;
6504
6505 mutex_lock(&iter->mutex);
6506
6507 if (iter->trace->splice_read) {
6508 ret = iter->trace->splice_read(iter, filp,
6509 ppos, pipe, len, flags);
6510 if (ret)
6511 goto out_err;
6512 }
6513
6514 ret = tracing_wait_pipe(filp);
6515 if (ret <= 0)
6516 goto out_err;
6517
6518 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6519 ret = -EFAULT;
6520 goto out_err;
6521 }
6522
6523 trace_event_read_lock();
6524 trace_access_lock(iter->cpu_file);
6525
6526 /* Fill as many pages as possible. */
6527 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6528 spd.pages[i] = alloc_page(GFP_KERNEL);
6529 if (!spd.pages[i])
6530 break;
6531
6532 rem = tracing_fill_pipe_page(rem, iter);
6533
6534 /* Copy the data into the page, so we can start over. */
6535 ret = trace_seq_to_buffer(&iter->seq,
6536 page_address(spd.pages[i]),
6537 trace_seq_used(&iter->seq));
6538 if (ret < 0) {
6539 __free_page(spd.pages[i]);
6540 break;
6541 }
6542 spd.partial[i].offset = 0;
6543 spd.partial[i].len = trace_seq_used(&iter->seq);
6544
6545 trace_seq_init(&iter->seq);
6546 }
6547
6548 trace_access_unlock(iter->cpu_file);
6549 trace_event_read_unlock();
6550 mutex_unlock(&iter->mutex);
6551
6552 spd.nr_pages = i;
6553
6554 if (i)
6555 ret = splice_to_pipe(pipe, &spd);
6556 else
6557 ret = 0;
6558 out:
6559 splice_shrink_spd(&spd);
6560 return ret;
6561
6562 out_err:
6563 mutex_unlock(&iter->mutex);
6564 goto out;
6565 }
6566
6567 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6568 tracing_entries_read(struct file *filp, char __user *ubuf,
6569 size_t cnt, loff_t *ppos)
6570 {
6571 struct inode *inode = file_inode(filp);
6572 struct trace_array *tr = inode->i_private;
6573 int cpu = tracing_get_cpu(inode);
6574 char buf[64];
6575 int r = 0;
6576 ssize_t ret;
6577
6578 mutex_lock(&trace_types_lock);
6579
6580 if (cpu == RING_BUFFER_ALL_CPUS) {
6581 int cpu, buf_size_same;
6582 unsigned long size;
6583
6584 size = 0;
6585 buf_size_same = 1;
6586 /* check if all cpu sizes are same */
6587 for_each_tracing_cpu(cpu) {
6588 /* fill in the size from first enabled cpu */
6589 if (size == 0)
6590 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6591 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6592 buf_size_same = 0;
6593 break;
6594 }
6595 }
6596
6597 if (buf_size_same) {
6598 if (!ring_buffer_expanded)
6599 r = sprintf(buf, "%lu (expanded: %lu)\n",
6600 size >> 10,
6601 trace_buf_size >> 10);
6602 else
6603 r = sprintf(buf, "%lu\n", size >> 10);
6604 } else
6605 r = sprintf(buf, "X\n");
6606 } else
6607 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6608
6609 mutex_unlock(&trace_types_lock);
6610
6611 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6612 return ret;
6613 }
6614
6615 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6616 tracing_entries_write(struct file *filp, const char __user *ubuf,
6617 size_t cnt, loff_t *ppos)
6618 {
6619 struct inode *inode = file_inode(filp);
6620 struct trace_array *tr = inode->i_private;
6621 unsigned long val;
6622 int ret;
6623
6624 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6625 if (ret)
6626 return ret;
6627
6628 /* must have at least 1 entry */
6629 if (!val)
6630 return -EINVAL;
6631
6632 /* value is in KB */
6633 val <<= 10;
6634 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6635 if (ret < 0)
6636 return ret;
6637
6638 *ppos += cnt;
6639
6640 return cnt;
6641 }
6642
6643 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6644 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6645 size_t cnt, loff_t *ppos)
6646 {
6647 struct trace_array *tr = filp->private_data;
6648 char buf[64];
6649 int r, cpu;
6650 unsigned long size = 0, expanded_size = 0;
6651
6652 mutex_lock(&trace_types_lock);
6653 for_each_tracing_cpu(cpu) {
6654 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6655 if (!ring_buffer_expanded)
6656 expanded_size += trace_buf_size >> 10;
6657 }
6658 if (ring_buffer_expanded)
6659 r = sprintf(buf, "%lu\n", size);
6660 else
6661 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6662 mutex_unlock(&trace_types_lock);
6663
6664 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6665 }
6666
6667 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6668 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6669 size_t cnt, loff_t *ppos)
6670 {
6671 /*
6672 * There is no need to read what the user has written, this function
6673 * is just to make sure that there is no error when "echo" is used
6674 */
6675
6676 *ppos += cnt;
6677
6678 return cnt;
6679 }
6680
6681 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6682 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6683 {
6684 struct trace_array *tr = inode->i_private;
6685
6686 /* disable tracing ? */
6687 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6688 tracer_tracing_off(tr);
6689 /* resize the ring buffer to 0 */
6690 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6691
6692 trace_array_put(tr);
6693
6694 return 0;
6695 }
6696
6697 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6698 tracing_mark_write(struct file *filp, const char __user *ubuf,
6699 size_t cnt, loff_t *fpos)
6700 {
6701 struct trace_array *tr = filp->private_data;
6702 struct ring_buffer_event *event;
6703 enum event_trigger_type tt = ETT_NONE;
6704 struct trace_buffer *buffer;
6705 struct print_entry *entry;
6706 unsigned long irq_flags;
6707 ssize_t written;
6708 int size;
6709 int len;
6710
6711 /* Used in tracing_mark_raw_write() as well */
6712 #define FAULTED_STR "<faulted>"
6713 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6714
6715 if (tracing_disabled)
6716 return -EINVAL;
6717
6718 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6719 return -EINVAL;
6720
6721 if (cnt > TRACE_BUF_SIZE)
6722 cnt = TRACE_BUF_SIZE;
6723
6724 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6725
6726 local_save_flags(irq_flags);
6727 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6728
6729 /* If less than "<faulted>", then make sure we can still add that */
6730 if (cnt < FAULTED_SIZE)
6731 size += FAULTED_SIZE - cnt;
6732
6733 buffer = tr->array_buffer.buffer;
6734 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6735 irq_flags, preempt_count());
6736 if (unlikely(!event))
6737 /* Ring buffer disabled, return as if not open for write */
6738 return -EBADF;
6739
6740 entry = ring_buffer_event_data(event);
6741 entry->ip = _THIS_IP_;
6742
6743 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6744 if (len) {
6745 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6746 cnt = FAULTED_SIZE;
6747 written = -EFAULT;
6748 } else
6749 written = cnt;
6750
6751 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6752 /* do not add \n before testing triggers, but add \0 */
6753 entry->buf[cnt] = '\0';
6754 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6755 }
6756
6757 if (entry->buf[cnt - 1] != '\n') {
6758 entry->buf[cnt] = '\n';
6759 entry->buf[cnt + 1] = '\0';
6760 } else
6761 entry->buf[cnt] = '\0';
6762
6763 if (static_branch_unlikely(&trace_marker_exports_enabled))
6764 ftrace_exports(event, TRACE_EXPORT_MARKER);
6765 __buffer_unlock_commit(buffer, event);
6766
6767 if (tt)
6768 event_triggers_post_call(tr->trace_marker_file, tt);
6769
6770 if (written > 0)
6771 *fpos += written;
6772
6773 return written;
6774 }
6775
6776 /* Limit it for now to 3K (including tag) */
6777 #define RAW_DATA_MAX_SIZE (1024*3)
6778
6779 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6780 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6781 size_t cnt, loff_t *fpos)
6782 {
6783 struct trace_array *tr = filp->private_data;
6784 struct ring_buffer_event *event;
6785 struct trace_buffer *buffer;
6786 struct raw_data_entry *entry;
6787 unsigned long irq_flags;
6788 ssize_t written;
6789 int size;
6790 int len;
6791
6792 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6793
6794 if (tracing_disabled)
6795 return -EINVAL;
6796
6797 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6798 return -EINVAL;
6799
6800 /* The marker must at least have a tag id */
6801 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6802 return -EINVAL;
6803
6804 if (cnt > TRACE_BUF_SIZE)
6805 cnt = TRACE_BUF_SIZE;
6806
6807 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6808
6809 local_save_flags(irq_flags);
6810 size = sizeof(*entry) + cnt;
6811 if (cnt < FAULT_SIZE_ID)
6812 size += FAULT_SIZE_ID - cnt;
6813
6814 buffer = tr->array_buffer.buffer;
6815 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6816 irq_flags, preempt_count());
6817 if (!event)
6818 /* Ring buffer disabled, return as if not open for write */
6819 return -EBADF;
6820
6821 entry = ring_buffer_event_data(event);
6822
6823 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6824 if (len) {
6825 entry->id = -1;
6826 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6827 written = -EFAULT;
6828 } else
6829 written = cnt;
6830
6831 __buffer_unlock_commit(buffer, event);
6832
6833 if (written > 0)
6834 *fpos += written;
6835
6836 return written;
6837 }
6838
tracing_clock_show(struct seq_file * m,void * v)6839 static int tracing_clock_show(struct seq_file *m, void *v)
6840 {
6841 struct trace_array *tr = m->private;
6842 int i;
6843
6844 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6845 seq_printf(m,
6846 "%s%s%s%s", i ? " " : "",
6847 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6848 i == tr->clock_id ? "]" : "");
6849 seq_putc(m, '\n');
6850
6851 return 0;
6852 }
6853
tracing_set_clock(struct trace_array * tr,const char * clockstr)6854 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6855 {
6856 int i;
6857
6858 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6859 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6860 break;
6861 }
6862 if (i == ARRAY_SIZE(trace_clocks))
6863 return -EINVAL;
6864
6865 mutex_lock(&trace_types_lock);
6866
6867 tr->clock_id = i;
6868
6869 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6870
6871 /*
6872 * New clock may not be consistent with the previous clock.
6873 * Reset the buffer so that it doesn't have incomparable timestamps.
6874 */
6875 tracing_reset_online_cpus(&tr->array_buffer);
6876
6877 #ifdef CONFIG_TRACER_MAX_TRACE
6878 if (tr->max_buffer.buffer)
6879 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6880 tracing_reset_online_cpus(&tr->max_buffer);
6881 #endif
6882
6883 mutex_unlock(&trace_types_lock);
6884
6885 return 0;
6886 }
6887
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6888 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6889 size_t cnt, loff_t *fpos)
6890 {
6891 struct seq_file *m = filp->private_data;
6892 struct trace_array *tr = m->private;
6893 char buf[64];
6894 const char *clockstr;
6895 int ret;
6896
6897 if (cnt >= sizeof(buf))
6898 return -EINVAL;
6899
6900 if (copy_from_user(buf, ubuf, cnt))
6901 return -EFAULT;
6902
6903 buf[cnt] = 0;
6904
6905 clockstr = strstrip(buf);
6906
6907 ret = tracing_set_clock(tr, clockstr);
6908 if (ret)
6909 return ret;
6910
6911 *fpos += cnt;
6912
6913 return cnt;
6914 }
6915
tracing_clock_open(struct inode * inode,struct file * file)6916 static int tracing_clock_open(struct inode *inode, struct file *file)
6917 {
6918 struct trace_array *tr = inode->i_private;
6919 int ret;
6920
6921 ret = tracing_check_open_get_tr(tr);
6922 if (ret)
6923 return ret;
6924
6925 ret = single_open(file, tracing_clock_show, inode->i_private);
6926 if (ret < 0)
6927 trace_array_put(tr);
6928
6929 return ret;
6930 }
6931
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6932 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6933 {
6934 struct trace_array *tr = m->private;
6935
6936 mutex_lock(&trace_types_lock);
6937
6938 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6939 seq_puts(m, "delta [absolute]\n");
6940 else
6941 seq_puts(m, "[delta] absolute\n");
6942
6943 mutex_unlock(&trace_types_lock);
6944
6945 return 0;
6946 }
6947
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6948 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6949 {
6950 struct trace_array *tr = inode->i_private;
6951 int ret;
6952
6953 ret = tracing_check_open_get_tr(tr);
6954 if (ret)
6955 return ret;
6956
6957 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6958 if (ret < 0)
6959 trace_array_put(tr);
6960
6961 return ret;
6962 }
6963
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)6964 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6965 {
6966 int ret = 0;
6967
6968 mutex_lock(&trace_types_lock);
6969
6970 if (abs && tr->time_stamp_abs_ref++)
6971 goto out;
6972
6973 if (!abs) {
6974 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6975 ret = -EINVAL;
6976 goto out;
6977 }
6978
6979 if (--tr->time_stamp_abs_ref)
6980 goto out;
6981 }
6982
6983 ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6984
6985 #ifdef CONFIG_TRACER_MAX_TRACE
6986 if (tr->max_buffer.buffer)
6987 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6988 #endif
6989 out:
6990 mutex_unlock(&trace_types_lock);
6991
6992 return ret;
6993 }
6994
6995 struct ftrace_buffer_info {
6996 struct trace_iterator iter;
6997 void *spare;
6998 unsigned int spare_cpu;
6999 unsigned int read;
7000 };
7001
7002 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7003 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7004 {
7005 struct trace_array *tr = inode->i_private;
7006 struct trace_iterator *iter;
7007 struct seq_file *m;
7008 int ret;
7009
7010 ret = tracing_check_open_get_tr(tr);
7011 if (ret)
7012 return ret;
7013
7014 if (file->f_mode & FMODE_READ) {
7015 iter = __tracing_open(inode, file, true);
7016 if (IS_ERR(iter))
7017 ret = PTR_ERR(iter);
7018 } else {
7019 /* Writes still need the seq_file to hold the private data */
7020 ret = -ENOMEM;
7021 m = kzalloc(sizeof(*m), GFP_KERNEL);
7022 if (!m)
7023 goto out;
7024 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7025 if (!iter) {
7026 kfree(m);
7027 goto out;
7028 }
7029 ret = 0;
7030
7031 iter->tr = tr;
7032 iter->array_buffer = &tr->max_buffer;
7033 iter->cpu_file = tracing_get_cpu(inode);
7034 m->private = iter;
7035 file->private_data = m;
7036 }
7037 out:
7038 if (ret < 0)
7039 trace_array_put(tr);
7040
7041 return ret;
7042 }
7043
7044 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7045 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7046 loff_t *ppos)
7047 {
7048 struct seq_file *m = filp->private_data;
7049 struct trace_iterator *iter = m->private;
7050 struct trace_array *tr = iter->tr;
7051 unsigned long val;
7052 int ret;
7053
7054 ret = tracing_update_buffers();
7055 if (ret < 0)
7056 return ret;
7057
7058 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7059 if (ret)
7060 return ret;
7061
7062 mutex_lock(&trace_types_lock);
7063
7064 if (tr->current_trace->use_max_tr) {
7065 ret = -EBUSY;
7066 goto out;
7067 }
7068
7069 local_irq_disable();
7070 arch_spin_lock(&tr->max_lock);
7071 if (tr->cond_snapshot)
7072 ret = -EBUSY;
7073 arch_spin_unlock(&tr->max_lock);
7074 local_irq_enable();
7075 if (ret)
7076 goto out;
7077
7078 switch (val) {
7079 case 0:
7080 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7081 ret = -EINVAL;
7082 break;
7083 }
7084 if (tr->allocated_snapshot)
7085 free_snapshot(tr);
7086 break;
7087 case 1:
7088 /* Only allow per-cpu swap if the ring buffer supports it */
7089 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7090 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7091 ret = -EINVAL;
7092 break;
7093 }
7094 #endif
7095 if (tr->allocated_snapshot)
7096 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7097 &tr->array_buffer, iter->cpu_file);
7098 else
7099 ret = tracing_alloc_snapshot_instance(tr);
7100 if (ret < 0)
7101 break;
7102 local_irq_disable();
7103 /* Now, we're going to swap */
7104 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7105 update_max_tr(tr, current, smp_processor_id(), NULL);
7106 else
7107 update_max_tr_single(tr, current, iter->cpu_file);
7108 local_irq_enable();
7109 break;
7110 default:
7111 if (tr->allocated_snapshot) {
7112 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7113 tracing_reset_online_cpus(&tr->max_buffer);
7114 else
7115 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7116 }
7117 break;
7118 }
7119
7120 if (ret >= 0) {
7121 *ppos += cnt;
7122 ret = cnt;
7123 }
7124 out:
7125 mutex_unlock(&trace_types_lock);
7126 return ret;
7127 }
7128
tracing_snapshot_release(struct inode * inode,struct file * file)7129 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7130 {
7131 struct seq_file *m = file->private_data;
7132 int ret;
7133
7134 ret = tracing_release(inode, file);
7135
7136 if (file->f_mode & FMODE_READ)
7137 return ret;
7138
7139 /* If write only, the seq_file is just a stub */
7140 if (m)
7141 kfree(m->private);
7142 kfree(m);
7143
7144 return 0;
7145 }
7146
7147 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7148 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7149 size_t count, loff_t *ppos);
7150 static int tracing_buffers_release(struct inode *inode, struct file *file);
7151 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7152 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7153
snapshot_raw_open(struct inode * inode,struct file * filp)7154 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7155 {
7156 struct ftrace_buffer_info *info;
7157 int ret;
7158
7159 /* The following checks for tracefs lockdown */
7160 ret = tracing_buffers_open(inode, filp);
7161 if (ret < 0)
7162 return ret;
7163
7164 info = filp->private_data;
7165
7166 if (info->iter.trace->use_max_tr) {
7167 tracing_buffers_release(inode, filp);
7168 return -EBUSY;
7169 }
7170
7171 info->iter.snapshot = true;
7172 info->iter.array_buffer = &info->iter.tr->max_buffer;
7173
7174 return ret;
7175 }
7176
7177 #endif /* CONFIG_TRACER_SNAPSHOT */
7178
7179
7180 static const struct file_operations tracing_thresh_fops = {
7181 .open = tracing_open_generic,
7182 .read = tracing_thresh_read,
7183 .write = tracing_thresh_write,
7184 .llseek = generic_file_llseek,
7185 };
7186
7187 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7188 static const struct file_operations tracing_max_lat_fops = {
7189 .open = tracing_open_generic,
7190 .read = tracing_max_lat_read,
7191 .write = tracing_max_lat_write,
7192 .llseek = generic_file_llseek,
7193 };
7194 #endif
7195
7196 static const struct file_operations set_tracer_fops = {
7197 .open = tracing_open_generic,
7198 .read = tracing_set_trace_read,
7199 .write = tracing_set_trace_write,
7200 .llseek = generic_file_llseek,
7201 };
7202
7203 static const struct file_operations tracing_pipe_fops = {
7204 .open = tracing_open_pipe,
7205 .poll = tracing_poll_pipe,
7206 .read = tracing_read_pipe,
7207 .splice_read = tracing_splice_read_pipe,
7208 .release = tracing_release_pipe,
7209 .llseek = no_llseek,
7210 };
7211
7212 static const struct file_operations tracing_entries_fops = {
7213 .open = tracing_open_generic_tr,
7214 .read = tracing_entries_read,
7215 .write = tracing_entries_write,
7216 .llseek = generic_file_llseek,
7217 .release = tracing_release_generic_tr,
7218 };
7219
7220 static const struct file_operations tracing_total_entries_fops = {
7221 .open = tracing_open_generic_tr,
7222 .read = tracing_total_entries_read,
7223 .llseek = generic_file_llseek,
7224 .release = tracing_release_generic_tr,
7225 };
7226
7227 static const struct file_operations tracing_free_buffer_fops = {
7228 .open = tracing_open_generic_tr,
7229 .write = tracing_free_buffer_write,
7230 .release = tracing_free_buffer_release,
7231 };
7232
7233 static const struct file_operations tracing_mark_fops = {
7234 .open = tracing_open_generic_tr,
7235 .write = tracing_mark_write,
7236 .llseek = generic_file_llseek,
7237 .release = tracing_release_generic_tr,
7238 };
7239
7240 static const struct file_operations tracing_mark_raw_fops = {
7241 .open = tracing_open_generic_tr,
7242 .write = tracing_mark_raw_write,
7243 .llseek = generic_file_llseek,
7244 .release = tracing_release_generic_tr,
7245 };
7246
7247 static const struct file_operations trace_clock_fops = {
7248 .open = tracing_clock_open,
7249 .read = seq_read,
7250 .llseek = seq_lseek,
7251 .release = tracing_single_release_tr,
7252 .write = tracing_clock_write,
7253 };
7254
7255 static const struct file_operations trace_time_stamp_mode_fops = {
7256 .open = tracing_time_stamp_mode_open,
7257 .read = seq_read,
7258 .llseek = seq_lseek,
7259 .release = tracing_single_release_tr,
7260 };
7261
7262 #ifdef CONFIG_TRACER_SNAPSHOT
7263 static const struct file_operations snapshot_fops = {
7264 .open = tracing_snapshot_open,
7265 .read = seq_read,
7266 .write = tracing_snapshot_write,
7267 .llseek = tracing_lseek,
7268 .release = tracing_snapshot_release,
7269 };
7270
7271 static const struct file_operations snapshot_raw_fops = {
7272 .open = snapshot_raw_open,
7273 .read = tracing_buffers_read,
7274 .release = tracing_buffers_release,
7275 .splice_read = tracing_buffers_splice_read,
7276 .llseek = no_llseek,
7277 };
7278
7279 #endif /* CONFIG_TRACER_SNAPSHOT */
7280
7281 #define TRACING_LOG_ERRS_MAX 8
7282 #define TRACING_LOG_LOC_MAX 128
7283
7284 #define CMD_PREFIX " Command: "
7285
7286 struct err_info {
7287 const char **errs; /* ptr to loc-specific array of err strings */
7288 u8 type; /* index into errs -> specific err string */
7289 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7290 u64 ts;
7291 };
7292
7293 struct tracing_log_err {
7294 struct list_head list;
7295 struct err_info info;
7296 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7297 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7298 };
7299
7300 static DEFINE_MUTEX(tracing_err_log_lock);
7301
get_tracing_log_err(struct trace_array * tr)7302 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7303 {
7304 struct tracing_log_err *err;
7305
7306 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7307 err = kzalloc(sizeof(*err), GFP_KERNEL);
7308 if (!err)
7309 err = ERR_PTR(-ENOMEM);
7310 else
7311 tr->n_err_log_entries++;
7312
7313 return err;
7314 }
7315
7316 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7317 list_del(&err->list);
7318
7319 return err;
7320 }
7321
7322 /**
7323 * err_pos - find the position of a string within a command for error careting
7324 * @cmd: The tracing command that caused the error
7325 * @str: The string to position the caret at within @cmd
7326 *
7327 * Finds the position of the first occurence of @str within @cmd. The
7328 * return value can be passed to tracing_log_err() for caret placement
7329 * within @cmd.
7330 *
7331 * Returns the index within @cmd of the first occurence of @str or 0
7332 * if @str was not found.
7333 */
err_pos(char * cmd,const char * str)7334 unsigned int err_pos(char *cmd, const char *str)
7335 {
7336 char *found;
7337
7338 if (WARN_ON(!strlen(cmd)))
7339 return 0;
7340
7341 found = strstr(cmd, str);
7342 if (found)
7343 return found - cmd;
7344
7345 return 0;
7346 }
7347
7348 /**
7349 * tracing_log_err - write an error to the tracing error log
7350 * @tr: The associated trace array for the error (NULL for top level array)
7351 * @loc: A string describing where the error occurred
7352 * @cmd: The tracing command that caused the error
7353 * @errs: The array of loc-specific static error strings
7354 * @type: The index into errs[], which produces the specific static err string
7355 * @pos: The position the caret should be placed in the cmd
7356 *
7357 * Writes an error into tracing/error_log of the form:
7358 *
7359 * <loc>: error: <text>
7360 * Command: <cmd>
7361 * ^
7362 *
7363 * tracing/error_log is a small log file containing the last
7364 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7365 * unless there has been a tracing error, and the error log can be
7366 * cleared and have its memory freed by writing the empty string in
7367 * truncation mode to it i.e. echo > tracing/error_log.
7368 *
7369 * NOTE: the @errs array along with the @type param are used to
7370 * produce a static error string - this string is not copied and saved
7371 * when the error is logged - only a pointer to it is saved. See
7372 * existing callers for examples of how static strings are typically
7373 * defined for use with tracing_log_err().
7374 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7375 void tracing_log_err(struct trace_array *tr,
7376 const char *loc, const char *cmd,
7377 const char **errs, u8 type, u8 pos)
7378 {
7379 struct tracing_log_err *err;
7380
7381 if (!tr)
7382 tr = &global_trace;
7383
7384 mutex_lock(&tracing_err_log_lock);
7385 err = get_tracing_log_err(tr);
7386 if (PTR_ERR(err) == -ENOMEM) {
7387 mutex_unlock(&tracing_err_log_lock);
7388 return;
7389 }
7390
7391 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7392 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7393
7394 err->info.errs = errs;
7395 err->info.type = type;
7396 err->info.pos = pos;
7397 err->info.ts = local_clock();
7398
7399 list_add_tail(&err->list, &tr->err_log);
7400 mutex_unlock(&tracing_err_log_lock);
7401 }
7402
clear_tracing_err_log(struct trace_array * tr)7403 static void clear_tracing_err_log(struct trace_array *tr)
7404 {
7405 struct tracing_log_err *err, *next;
7406
7407 mutex_lock(&tracing_err_log_lock);
7408 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7409 list_del(&err->list);
7410 kfree(err);
7411 }
7412
7413 tr->n_err_log_entries = 0;
7414 mutex_unlock(&tracing_err_log_lock);
7415 }
7416
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7417 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7418 {
7419 struct trace_array *tr = m->private;
7420
7421 mutex_lock(&tracing_err_log_lock);
7422
7423 return seq_list_start(&tr->err_log, *pos);
7424 }
7425
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7426 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7427 {
7428 struct trace_array *tr = m->private;
7429
7430 return seq_list_next(v, &tr->err_log, pos);
7431 }
7432
tracing_err_log_seq_stop(struct seq_file * m,void * v)7433 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7434 {
7435 mutex_unlock(&tracing_err_log_lock);
7436 }
7437
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7438 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7439 {
7440 u8 i;
7441
7442 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7443 seq_putc(m, ' ');
7444 for (i = 0; i < pos; i++)
7445 seq_putc(m, ' ');
7446 seq_puts(m, "^\n");
7447 }
7448
tracing_err_log_seq_show(struct seq_file * m,void * v)7449 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7450 {
7451 struct tracing_log_err *err = v;
7452
7453 if (err) {
7454 const char *err_text = err->info.errs[err->info.type];
7455 u64 sec = err->info.ts;
7456 u32 nsec;
7457
7458 nsec = do_div(sec, NSEC_PER_SEC);
7459 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7460 err->loc, err_text);
7461 seq_printf(m, "%s", err->cmd);
7462 tracing_err_log_show_pos(m, err->info.pos);
7463 }
7464
7465 return 0;
7466 }
7467
7468 static const struct seq_operations tracing_err_log_seq_ops = {
7469 .start = tracing_err_log_seq_start,
7470 .next = tracing_err_log_seq_next,
7471 .stop = tracing_err_log_seq_stop,
7472 .show = tracing_err_log_seq_show
7473 };
7474
tracing_err_log_open(struct inode * inode,struct file * file)7475 static int tracing_err_log_open(struct inode *inode, struct file *file)
7476 {
7477 struct trace_array *tr = inode->i_private;
7478 int ret = 0;
7479
7480 ret = tracing_check_open_get_tr(tr);
7481 if (ret)
7482 return ret;
7483
7484 /* If this file was opened for write, then erase contents */
7485 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7486 clear_tracing_err_log(tr);
7487
7488 if (file->f_mode & FMODE_READ) {
7489 ret = seq_open(file, &tracing_err_log_seq_ops);
7490 if (!ret) {
7491 struct seq_file *m = file->private_data;
7492 m->private = tr;
7493 } else {
7494 trace_array_put(tr);
7495 }
7496 }
7497 return ret;
7498 }
7499
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7500 static ssize_t tracing_err_log_write(struct file *file,
7501 const char __user *buffer,
7502 size_t count, loff_t *ppos)
7503 {
7504 return count;
7505 }
7506
tracing_err_log_release(struct inode * inode,struct file * file)7507 static int tracing_err_log_release(struct inode *inode, struct file *file)
7508 {
7509 struct trace_array *tr = inode->i_private;
7510
7511 trace_array_put(tr);
7512
7513 if (file->f_mode & FMODE_READ)
7514 seq_release(inode, file);
7515
7516 return 0;
7517 }
7518
7519 static const struct file_operations tracing_err_log_fops = {
7520 .open = tracing_err_log_open,
7521 .write = tracing_err_log_write,
7522 .read = seq_read,
7523 .llseek = seq_lseek,
7524 .release = tracing_err_log_release,
7525 };
7526
tracing_buffers_open(struct inode * inode,struct file * filp)7527 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7528 {
7529 struct trace_array *tr = inode->i_private;
7530 struct ftrace_buffer_info *info;
7531 int ret;
7532
7533 ret = tracing_check_open_get_tr(tr);
7534 if (ret)
7535 return ret;
7536
7537 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7538 if (!info) {
7539 trace_array_put(tr);
7540 return -ENOMEM;
7541 }
7542
7543 mutex_lock(&trace_types_lock);
7544
7545 info->iter.tr = tr;
7546 info->iter.cpu_file = tracing_get_cpu(inode);
7547 info->iter.trace = tr->current_trace;
7548 info->iter.array_buffer = &tr->array_buffer;
7549 info->spare = NULL;
7550 /* Force reading ring buffer for first read */
7551 info->read = (unsigned int)-1;
7552
7553 filp->private_data = info;
7554
7555 tr->trace_ref++;
7556
7557 mutex_unlock(&trace_types_lock);
7558
7559 ret = nonseekable_open(inode, filp);
7560 if (ret < 0)
7561 trace_array_put(tr);
7562
7563 return ret;
7564 }
7565
7566 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7567 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7568 {
7569 struct ftrace_buffer_info *info = filp->private_data;
7570 struct trace_iterator *iter = &info->iter;
7571
7572 return trace_poll(iter, filp, poll_table);
7573 }
7574
7575 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7576 tracing_buffers_read(struct file *filp, char __user *ubuf,
7577 size_t count, loff_t *ppos)
7578 {
7579 struct ftrace_buffer_info *info = filp->private_data;
7580 struct trace_iterator *iter = &info->iter;
7581 ssize_t ret = 0;
7582 ssize_t size;
7583
7584 if (!count)
7585 return 0;
7586
7587 #ifdef CONFIG_TRACER_MAX_TRACE
7588 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7589 return -EBUSY;
7590 #endif
7591
7592 if (!info->spare) {
7593 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7594 iter->cpu_file);
7595 if (IS_ERR(info->spare)) {
7596 ret = PTR_ERR(info->spare);
7597 info->spare = NULL;
7598 } else {
7599 info->spare_cpu = iter->cpu_file;
7600 }
7601 }
7602 if (!info->spare)
7603 return ret;
7604
7605 /* Do we have previous read data to read? */
7606 if (info->read < PAGE_SIZE)
7607 goto read;
7608
7609 again:
7610 trace_access_lock(iter->cpu_file);
7611 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7612 &info->spare,
7613 count,
7614 iter->cpu_file, 0);
7615 trace_access_unlock(iter->cpu_file);
7616
7617 if (ret < 0) {
7618 if (trace_empty(iter)) {
7619 if ((filp->f_flags & O_NONBLOCK))
7620 return -EAGAIN;
7621
7622 ret = wait_on_pipe(iter, 0);
7623 if (ret)
7624 return ret;
7625
7626 goto again;
7627 }
7628 return 0;
7629 }
7630
7631 info->read = 0;
7632 read:
7633 size = PAGE_SIZE - info->read;
7634 if (size > count)
7635 size = count;
7636
7637 ret = copy_to_user(ubuf, info->spare + info->read, size);
7638 if (ret == size)
7639 return -EFAULT;
7640
7641 size -= ret;
7642
7643 *ppos += size;
7644 info->read += size;
7645
7646 return size;
7647 }
7648
tracing_buffers_release(struct inode * inode,struct file * file)7649 static int tracing_buffers_release(struct inode *inode, struct file *file)
7650 {
7651 struct ftrace_buffer_info *info = file->private_data;
7652 struct trace_iterator *iter = &info->iter;
7653
7654 mutex_lock(&trace_types_lock);
7655
7656 iter->tr->trace_ref--;
7657
7658 __trace_array_put(iter->tr);
7659
7660 if (info->spare)
7661 ring_buffer_free_read_page(iter->array_buffer->buffer,
7662 info->spare_cpu, info->spare);
7663 kvfree(info);
7664
7665 mutex_unlock(&trace_types_lock);
7666
7667 return 0;
7668 }
7669
7670 struct buffer_ref {
7671 struct trace_buffer *buffer;
7672 void *page;
7673 int cpu;
7674 refcount_t refcount;
7675 };
7676
buffer_ref_release(struct buffer_ref * ref)7677 static void buffer_ref_release(struct buffer_ref *ref)
7678 {
7679 if (!refcount_dec_and_test(&ref->refcount))
7680 return;
7681 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7682 kfree(ref);
7683 }
7684
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7685 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7686 struct pipe_buffer *buf)
7687 {
7688 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7689
7690 buffer_ref_release(ref);
7691 buf->private = 0;
7692 }
7693
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7694 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7695 struct pipe_buffer *buf)
7696 {
7697 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7698
7699 if (refcount_read(&ref->refcount) > INT_MAX/2)
7700 return false;
7701
7702 refcount_inc(&ref->refcount);
7703 return true;
7704 }
7705
7706 /* Pipe buffer operations for a buffer. */
7707 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7708 .release = buffer_pipe_buf_release,
7709 .get = buffer_pipe_buf_get,
7710 };
7711
7712 /*
7713 * Callback from splice_to_pipe(), if we need to release some pages
7714 * at the end of the spd in case we error'ed out in filling the pipe.
7715 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7716 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7717 {
7718 struct buffer_ref *ref =
7719 (struct buffer_ref *)spd->partial[i].private;
7720
7721 buffer_ref_release(ref);
7722 spd->partial[i].private = 0;
7723 }
7724
7725 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7726 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7727 struct pipe_inode_info *pipe, size_t len,
7728 unsigned int flags)
7729 {
7730 struct ftrace_buffer_info *info = file->private_data;
7731 struct trace_iterator *iter = &info->iter;
7732 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7733 struct page *pages_def[PIPE_DEF_BUFFERS];
7734 struct splice_pipe_desc spd = {
7735 .pages = pages_def,
7736 .partial = partial_def,
7737 .nr_pages_max = PIPE_DEF_BUFFERS,
7738 .ops = &buffer_pipe_buf_ops,
7739 .spd_release = buffer_spd_release,
7740 };
7741 struct buffer_ref *ref;
7742 int entries, i;
7743 ssize_t ret = 0;
7744
7745 #ifdef CONFIG_TRACER_MAX_TRACE
7746 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7747 return -EBUSY;
7748 #endif
7749
7750 if (*ppos & (PAGE_SIZE - 1))
7751 return -EINVAL;
7752
7753 if (len & (PAGE_SIZE - 1)) {
7754 if (len < PAGE_SIZE)
7755 return -EINVAL;
7756 len &= PAGE_MASK;
7757 }
7758
7759 if (splice_grow_spd(pipe, &spd))
7760 return -ENOMEM;
7761
7762 again:
7763 trace_access_lock(iter->cpu_file);
7764 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7765
7766 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7767 struct page *page;
7768 int r;
7769
7770 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7771 if (!ref) {
7772 ret = -ENOMEM;
7773 break;
7774 }
7775
7776 refcount_set(&ref->refcount, 1);
7777 ref->buffer = iter->array_buffer->buffer;
7778 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7779 if (IS_ERR(ref->page)) {
7780 ret = PTR_ERR(ref->page);
7781 ref->page = NULL;
7782 kfree(ref);
7783 break;
7784 }
7785 ref->cpu = iter->cpu_file;
7786
7787 r = ring_buffer_read_page(ref->buffer, &ref->page,
7788 len, iter->cpu_file, 1);
7789 if (r < 0) {
7790 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7791 ref->page);
7792 kfree(ref);
7793 break;
7794 }
7795
7796 page = virt_to_page(ref->page);
7797
7798 spd.pages[i] = page;
7799 spd.partial[i].len = PAGE_SIZE;
7800 spd.partial[i].offset = 0;
7801 spd.partial[i].private = (unsigned long)ref;
7802 spd.nr_pages++;
7803 *ppos += PAGE_SIZE;
7804
7805 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7806 }
7807
7808 trace_access_unlock(iter->cpu_file);
7809 spd.nr_pages = i;
7810
7811 /* did we read anything? */
7812 if (!spd.nr_pages) {
7813 if (ret)
7814 goto out;
7815
7816 ret = -EAGAIN;
7817 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7818 goto out;
7819
7820 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7821 if (ret)
7822 goto out;
7823
7824 goto again;
7825 }
7826
7827 ret = splice_to_pipe(pipe, &spd);
7828 out:
7829 splice_shrink_spd(&spd);
7830
7831 return ret;
7832 }
7833
7834 static const struct file_operations tracing_buffers_fops = {
7835 .open = tracing_buffers_open,
7836 .read = tracing_buffers_read,
7837 .poll = tracing_buffers_poll,
7838 .release = tracing_buffers_release,
7839 .splice_read = tracing_buffers_splice_read,
7840 .llseek = no_llseek,
7841 };
7842
7843 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7844 tracing_stats_read(struct file *filp, char __user *ubuf,
7845 size_t count, loff_t *ppos)
7846 {
7847 struct inode *inode = file_inode(filp);
7848 struct trace_array *tr = inode->i_private;
7849 struct array_buffer *trace_buf = &tr->array_buffer;
7850 int cpu = tracing_get_cpu(inode);
7851 struct trace_seq *s;
7852 unsigned long cnt;
7853 unsigned long long t;
7854 unsigned long usec_rem;
7855
7856 s = kmalloc(sizeof(*s), GFP_KERNEL);
7857 if (!s)
7858 return -ENOMEM;
7859
7860 trace_seq_init(s);
7861
7862 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7863 trace_seq_printf(s, "entries: %ld\n", cnt);
7864
7865 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7866 trace_seq_printf(s, "overrun: %ld\n", cnt);
7867
7868 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7869 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7870
7871 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7872 trace_seq_printf(s, "bytes: %ld\n", cnt);
7873
7874 if (trace_clocks[tr->clock_id].in_ns) {
7875 /* local or global for trace_clock */
7876 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7877 usec_rem = do_div(t, USEC_PER_SEC);
7878 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7879 t, usec_rem);
7880
7881 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7882 usec_rem = do_div(t, USEC_PER_SEC);
7883 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7884 } else {
7885 /* counter or tsc mode for trace_clock */
7886 trace_seq_printf(s, "oldest event ts: %llu\n",
7887 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7888
7889 trace_seq_printf(s, "now ts: %llu\n",
7890 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7891 }
7892
7893 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7894 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7895
7896 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7897 trace_seq_printf(s, "read events: %ld\n", cnt);
7898
7899 count = simple_read_from_buffer(ubuf, count, ppos,
7900 s->buffer, trace_seq_used(s));
7901
7902 kfree(s);
7903
7904 return count;
7905 }
7906
7907 static const struct file_operations tracing_stats_fops = {
7908 .open = tracing_open_generic_tr,
7909 .read = tracing_stats_read,
7910 .llseek = generic_file_llseek,
7911 .release = tracing_release_generic_tr,
7912 };
7913
7914 #ifdef CONFIG_DYNAMIC_FTRACE
7915
7916 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7917 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7918 size_t cnt, loff_t *ppos)
7919 {
7920 ssize_t ret;
7921 char *buf;
7922 int r;
7923
7924 /* 256 should be plenty to hold the amount needed */
7925 buf = kmalloc(256, GFP_KERNEL);
7926 if (!buf)
7927 return -ENOMEM;
7928
7929 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7930 ftrace_update_tot_cnt,
7931 ftrace_number_of_pages,
7932 ftrace_number_of_groups);
7933
7934 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7935 kfree(buf);
7936 return ret;
7937 }
7938
7939 static const struct file_operations tracing_dyn_info_fops = {
7940 .open = tracing_open_generic,
7941 .read = tracing_read_dyn_info,
7942 .llseek = generic_file_llseek,
7943 };
7944 #endif /* CONFIG_DYNAMIC_FTRACE */
7945
7946 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7947 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7948 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7949 struct trace_array *tr, struct ftrace_probe_ops *ops,
7950 void *data)
7951 {
7952 tracing_snapshot_instance(tr);
7953 }
7954
7955 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7956 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7957 struct trace_array *tr, struct ftrace_probe_ops *ops,
7958 void *data)
7959 {
7960 struct ftrace_func_mapper *mapper = data;
7961 long *count = NULL;
7962
7963 if (mapper)
7964 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7965
7966 if (count) {
7967
7968 if (*count <= 0)
7969 return;
7970
7971 (*count)--;
7972 }
7973
7974 tracing_snapshot_instance(tr);
7975 }
7976
7977 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7978 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7979 struct ftrace_probe_ops *ops, void *data)
7980 {
7981 struct ftrace_func_mapper *mapper = data;
7982 long *count = NULL;
7983
7984 seq_printf(m, "%ps:", (void *)ip);
7985
7986 seq_puts(m, "snapshot");
7987
7988 if (mapper)
7989 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7990
7991 if (count)
7992 seq_printf(m, ":count=%ld\n", *count);
7993 else
7994 seq_puts(m, ":unlimited\n");
7995
7996 return 0;
7997 }
7998
7999 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8000 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8001 unsigned long ip, void *init_data, void **data)
8002 {
8003 struct ftrace_func_mapper *mapper = *data;
8004
8005 if (!mapper) {
8006 mapper = allocate_ftrace_func_mapper();
8007 if (!mapper)
8008 return -ENOMEM;
8009 *data = mapper;
8010 }
8011
8012 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8013 }
8014
8015 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8016 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8017 unsigned long ip, void *data)
8018 {
8019 struct ftrace_func_mapper *mapper = data;
8020
8021 if (!ip) {
8022 if (!mapper)
8023 return;
8024 free_ftrace_func_mapper(mapper, NULL);
8025 return;
8026 }
8027
8028 ftrace_func_mapper_remove_ip(mapper, ip);
8029 }
8030
8031 static struct ftrace_probe_ops snapshot_probe_ops = {
8032 .func = ftrace_snapshot,
8033 .print = ftrace_snapshot_print,
8034 };
8035
8036 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8037 .func = ftrace_count_snapshot,
8038 .print = ftrace_snapshot_print,
8039 .init = ftrace_snapshot_init,
8040 .free = ftrace_snapshot_free,
8041 };
8042
8043 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8044 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8045 char *glob, char *cmd, char *param, int enable)
8046 {
8047 struct ftrace_probe_ops *ops;
8048 void *count = (void *)-1;
8049 char *number;
8050 int ret;
8051
8052 if (!tr)
8053 return -ENODEV;
8054
8055 /* hash funcs only work with set_ftrace_filter */
8056 if (!enable)
8057 return -EINVAL;
8058
8059 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8060
8061 if (glob[0] == '!')
8062 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8063
8064 if (!param)
8065 goto out_reg;
8066
8067 number = strsep(¶m, ":");
8068
8069 if (!strlen(number))
8070 goto out_reg;
8071
8072 /*
8073 * We use the callback data field (which is a pointer)
8074 * as our counter.
8075 */
8076 ret = kstrtoul(number, 0, (unsigned long *)&count);
8077 if (ret)
8078 return ret;
8079
8080 out_reg:
8081 ret = tracing_alloc_snapshot_instance(tr);
8082 if (ret < 0)
8083 goto out;
8084
8085 ret = register_ftrace_function_probe(glob, tr, ops, count);
8086
8087 out:
8088 return ret < 0 ? ret : 0;
8089 }
8090
8091 static struct ftrace_func_command ftrace_snapshot_cmd = {
8092 .name = "snapshot",
8093 .func = ftrace_trace_snapshot_callback,
8094 };
8095
register_snapshot_cmd(void)8096 static __init int register_snapshot_cmd(void)
8097 {
8098 return register_ftrace_command(&ftrace_snapshot_cmd);
8099 }
8100 #else
register_snapshot_cmd(void)8101 static inline __init int register_snapshot_cmd(void) { return 0; }
8102 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8103
tracing_get_dentry(struct trace_array * tr)8104 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8105 {
8106 if (WARN_ON(!tr->dir))
8107 return ERR_PTR(-ENODEV);
8108
8109 /* Top directory uses NULL as the parent */
8110 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8111 return NULL;
8112
8113 /* All sub buffers have a descriptor */
8114 return tr->dir;
8115 }
8116
tracing_dentry_percpu(struct trace_array * tr,int cpu)8117 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8118 {
8119 struct dentry *d_tracer;
8120
8121 if (tr->percpu_dir)
8122 return tr->percpu_dir;
8123
8124 d_tracer = tracing_get_dentry(tr);
8125 if (IS_ERR(d_tracer))
8126 return NULL;
8127
8128 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8129
8130 MEM_FAIL(!tr->percpu_dir,
8131 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8132
8133 return tr->percpu_dir;
8134 }
8135
8136 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8137 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8138 void *data, long cpu, const struct file_operations *fops)
8139 {
8140 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8141
8142 if (ret) /* See tracing_get_cpu() */
8143 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8144 return ret;
8145 }
8146
8147 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8148 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8149 {
8150 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8151 struct dentry *d_cpu;
8152 char cpu_dir[30]; /* 30 characters should be more than enough */
8153
8154 if (!d_percpu)
8155 return;
8156
8157 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8158 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8159 if (!d_cpu) {
8160 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8161 return;
8162 }
8163
8164 /* per cpu trace_pipe */
8165 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8166 tr, cpu, &tracing_pipe_fops);
8167
8168 /* per cpu trace */
8169 trace_create_cpu_file("trace", 0644, d_cpu,
8170 tr, cpu, &tracing_fops);
8171
8172 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8173 tr, cpu, &tracing_buffers_fops);
8174
8175 trace_create_cpu_file("stats", 0444, d_cpu,
8176 tr, cpu, &tracing_stats_fops);
8177
8178 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8179 tr, cpu, &tracing_entries_fops);
8180
8181 #ifdef CONFIG_TRACER_SNAPSHOT
8182 trace_create_cpu_file("snapshot", 0644, d_cpu,
8183 tr, cpu, &snapshot_fops);
8184
8185 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8186 tr, cpu, &snapshot_raw_fops);
8187 #endif
8188 }
8189
8190 #ifdef CONFIG_FTRACE_SELFTEST
8191 /* Let selftest have access to static functions in this file */
8192 #include "trace_selftest.c"
8193 #endif
8194
8195 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8196 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8197 loff_t *ppos)
8198 {
8199 struct trace_option_dentry *topt = filp->private_data;
8200 char *buf;
8201
8202 if (topt->flags->val & topt->opt->bit)
8203 buf = "1\n";
8204 else
8205 buf = "0\n";
8206
8207 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8208 }
8209
8210 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8211 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8212 loff_t *ppos)
8213 {
8214 struct trace_option_dentry *topt = filp->private_data;
8215 unsigned long val;
8216 int ret;
8217
8218 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8219 if (ret)
8220 return ret;
8221
8222 if (val != 0 && val != 1)
8223 return -EINVAL;
8224
8225 if (!!(topt->flags->val & topt->opt->bit) != val) {
8226 mutex_lock(&trace_types_lock);
8227 ret = __set_tracer_option(topt->tr, topt->flags,
8228 topt->opt, !val);
8229 mutex_unlock(&trace_types_lock);
8230 if (ret)
8231 return ret;
8232 }
8233
8234 *ppos += cnt;
8235
8236 return cnt;
8237 }
8238
8239
8240 static const struct file_operations trace_options_fops = {
8241 .open = tracing_open_generic,
8242 .read = trace_options_read,
8243 .write = trace_options_write,
8244 .llseek = generic_file_llseek,
8245 };
8246
8247 /*
8248 * In order to pass in both the trace_array descriptor as well as the index
8249 * to the flag that the trace option file represents, the trace_array
8250 * has a character array of trace_flags_index[], which holds the index
8251 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8252 * The address of this character array is passed to the flag option file
8253 * read/write callbacks.
8254 *
8255 * In order to extract both the index and the trace_array descriptor,
8256 * get_tr_index() uses the following algorithm.
8257 *
8258 * idx = *ptr;
8259 *
8260 * As the pointer itself contains the address of the index (remember
8261 * index[1] == 1).
8262 *
8263 * Then to get the trace_array descriptor, by subtracting that index
8264 * from the ptr, we get to the start of the index itself.
8265 *
8266 * ptr - idx == &index[0]
8267 *
8268 * Then a simple container_of() from that pointer gets us to the
8269 * trace_array descriptor.
8270 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8271 static void get_tr_index(void *data, struct trace_array **ptr,
8272 unsigned int *pindex)
8273 {
8274 *pindex = *(unsigned char *)data;
8275
8276 *ptr = container_of(data - *pindex, struct trace_array,
8277 trace_flags_index);
8278 }
8279
8280 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8281 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8282 loff_t *ppos)
8283 {
8284 void *tr_index = filp->private_data;
8285 struct trace_array *tr;
8286 unsigned int index;
8287 char *buf;
8288
8289 get_tr_index(tr_index, &tr, &index);
8290
8291 if (tr->trace_flags & (1 << index))
8292 buf = "1\n";
8293 else
8294 buf = "0\n";
8295
8296 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8297 }
8298
8299 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8300 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8301 loff_t *ppos)
8302 {
8303 void *tr_index = filp->private_data;
8304 struct trace_array *tr;
8305 unsigned int index;
8306 unsigned long val;
8307 int ret;
8308
8309 get_tr_index(tr_index, &tr, &index);
8310
8311 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8312 if (ret)
8313 return ret;
8314
8315 if (val != 0 && val != 1)
8316 return -EINVAL;
8317
8318 mutex_lock(&event_mutex);
8319 mutex_lock(&trace_types_lock);
8320 ret = set_tracer_flag(tr, 1 << index, val);
8321 mutex_unlock(&trace_types_lock);
8322 mutex_unlock(&event_mutex);
8323
8324 if (ret < 0)
8325 return ret;
8326
8327 *ppos += cnt;
8328
8329 return cnt;
8330 }
8331
8332 static const struct file_operations trace_options_core_fops = {
8333 .open = tracing_open_generic,
8334 .read = trace_options_core_read,
8335 .write = trace_options_core_write,
8336 .llseek = generic_file_llseek,
8337 };
8338
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8339 struct dentry *trace_create_file(const char *name,
8340 umode_t mode,
8341 struct dentry *parent,
8342 void *data,
8343 const struct file_operations *fops)
8344 {
8345 struct dentry *ret;
8346
8347 ret = tracefs_create_file(name, mode, parent, data, fops);
8348 if (!ret)
8349 pr_warn("Could not create tracefs '%s' entry\n", name);
8350
8351 return ret;
8352 }
8353
8354
trace_options_init_dentry(struct trace_array * tr)8355 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8356 {
8357 struct dentry *d_tracer;
8358
8359 if (tr->options)
8360 return tr->options;
8361
8362 d_tracer = tracing_get_dentry(tr);
8363 if (IS_ERR(d_tracer))
8364 return NULL;
8365
8366 tr->options = tracefs_create_dir("options", d_tracer);
8367 if (!tr->options) {
8368 pr_warn("Could not create tracefs directory 'options'\n");
8369 return NULL;
8370 }
8371
8372 return tr->options;
8373 }
8374
8375 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8376 create_trace_option_file(struct trace_array *tr,
8377 struct trace_option_dentry *topt,
8378 struct tracer_flags *flags,
8379 struct tracer_opt *opt)
8380 {
8381 struct dentry *t_options;
8382
8383 t_options = trace_options_init_dentry(tr);
8384 if (!t_options)
8385 return;
8386
8387 topt->flags = flags;
8388 topt->opt = opt;
8389 topt->tr = tr;
8390
8391 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8392 &trace_options_fops);
8393
8394 }
8395
8396 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8397 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8398 {
8399 struct trace_option_dentry *topts;
8400 struct trace_options *tr_topts;
8401 struct tracer_flags *flags;
8402 struct tracer_opt *opts;
8403 int cnt;
8404 int i;
8405
8406 if (!tracer)
8407 return;
8408
8409 flags = tracer->flags;
8410
8411 if (!flags || !flags->opts)
8412 return;
8413
8414 /*
8415 * If this is an instance, only create flags for tracers
8416 * the instance may have.
8417 */
8418 if (!trace_ok_for_array(tracer, tr))
8419 return;
8420
8421 for (i = 0; i < tr->nr_topts; i++) {
8422 /* Make sure there's no duplicate flags. */
8423 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8424 return;
8425 }
8426
8427 opts = flags->opts;
8428
8429 for (cnt = 0; opts[cnt].name; cnt++)
8430 ;
8431
8432 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8433 if (!topts)
8434 return;
8435
8436 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8437 GFP_KERNEL);
8438 if (!tr_topts) {
8439 kfree(topts);
8440 return;
8441 }
8442
8443 tr->topts = tr_topts;
8444 tr->topts[tr->nr_topts].tracer = tracer;
8445 tr->topts[tr->nr_topts].topts = topts;
8446 tr->nr_topts++;
8447
8448 for (cnt = 0; opts[cnt].name; cnt++) {
8449 create_trace_option_file(tr, &topts[cnt], flags,
8450 &opts[cnt]);
8451 MEM_FAIL(topts[cnt].entry == NULL,
8452 "Failed to create trace option: %s",
8453 opts[cnt].name);
8454 }
8455 }
8456
8457 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8458 create_trace_option_core_file(struct trace_array *tr,
8459 const char *option, long index)
8460 {
8461 struct dentry *t_options;
8462
8463 t_options = trace_options_init_dentry(tr);
8464 if (!t_options)
8465 return NULL;
8466
8467 return trace_create_file(option, 0644, t_options,
8468 (void *)&tr->trace_flags_index[index],
8469 &trace_options_core_fops);
8470 }
8471
create_trace_options_dir(struct trace_array * tr)8472 static void create_trace_options_dir(struct trace_array *tr)
8473 {
8474 struct dentry *t_options;
8475 bool top_level = tr == &global_trace;
8476 int i;
8477
8478 t_options = trace_options_init_dentry(tr);
8479 if (!t_options)
8480 return;
8481
8482 for (i = 0; trace_options[i]; i++) {
8483 if (top_level ||
8484 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8485 create_trace_option_core_file(tr, trace_options[i], i);
8486 }
8487 }
8488
8489 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8490 rb_simple_read(struct file *filp, char __user *ubuf,
8491 size_t cnt, loff_t *ppos)
8492 {
8493 struct trace_array *tr = filp->private_data;
8494 char buf[64];
8495 int r;
8496
8497 r = tracer_tracing_is_on(tr);
8498 r = sprintf(buf, "%d\n", r);
8499
8500 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8501 }
8502
8503 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8504 rb_simple_write(struct file *filp, const char __user *ubuf,
8505 size_t cnt, loff_t *ppos)
8506 {
8507 struct trace_array *tr = filp->private_data;
8508 struct trace_buffer *buffer = tr->array_buffer.buffer;
8509 unsigned long val;
8510 int ret;
8511
8512 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8513 if (ret)
8514 return ret;
8515
8516 if (buffer) {
8517 mutex_lock(&trace_types_lock);
8518 if (!!val == tracer_tracing_is_on(tr)) {
8519 val = 0; /* do nothing */
8520 } else if (val) {
8521 tracer_tracing_on(tr);
8522 if (tr->current_trace->start)
8523 tr->current_trace->start(tr);
8524 } else {
8525 tracer_tracing_off(tr);
8526 if (tr->current_trace->stop)
8527 tr->current_trace->stop(tr);
8528 }
8529 mutex_unlock(&trace_types_lock);
8530 }
8531
8532 (*ppos)++;
8533
8534 return cnt;
8535 }
8536
8537 static const struct file_operations rb_simple_fops = {
8538 .open = tracing_open_generic_tr,
8539 .read = rb_simple_read,
8540 .write = rb_simple_write,
8541 .release = tracing_release_generic_tr,
8542 .llseek = default_llseek,
8543 };
8544
8545 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8546 buffer_percent_read(struct file *filp, char __user *ubuf,
8547 size_t cnt, loff_t *ppos)
8548 {
8549 struct trace_array *tr = filp->private_data;
8550 char buf[64];
8551 int r;
8552
8553 r = tr->buffer_percent;
8554 r = sprintf(buf, "%d\n", r);
8555
8556 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8557 }
8558
8559 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8560 buffer_percent_write(struct file *filp, const char __user *ubuf,
8561 size_t cnt, loff_t *ppos)
8562 {
8563 struct trace_array *tr = filp->private_data;
8564 unsigned long val;
8565 int ret;
8566
8567 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8568 if (ret)
8569 return ret;
8570
8571 if (val > 100)
8572 return -EINVAL;
8573
8574 tr->buffer_percent = val;
8575
8576 (*ppos)++;
8577
8578 return cnt;
8579 }
8580
8581 static const struct file_operations buffer_percent_fops = {
8582 .open = tracing_open_generic_tr,
8583 .read = buffer_percent_read,
8584 .write = buffer_percent_write,
8585 .release = tracing_release_generic_tr,
8586 .llseek = default_llseek,
8587 };
8588
8589 static struct dentry *trace_instance_dir;
8590
8591 static void
8592 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8593
8594 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)8595 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8596 {
8597 enum ring_buffer_flags rb_flags;
8598
8599 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8600
8601 buf->tr = tr;
8602
8603 buf->buffer = ring_buffer_alloc(size, rb_flags);
8604 if (!buf->buffer)
8605 return -ENOMEM;
8606
8607 buf->data = alloc_percpu(struct trace_array_cpu);
8608 if (!buf->data) {
8609 ring_buffer_free(buf->buffer);
8610 buf->buffer = NULL;
8611 return -ENOMEM;
8612 }
8613
8614 /* Allocate the first page for all buffers */
8615 set_buffer_entries(&tr->array_buffer,
8616 ring_buffer_size(tr->array_buffer.buffer, 0));
8617
8618 return 0;
8619 }
8620
allocate_trace_buffers(struct trace_array * tr,int size)8621 static int allocate_trace_buffers(struct trace_array *tr, int size)
8622 {
8623 int ret;
8624
8625 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8626 if (ret)
8627 return ret;
8628
8629 #ifdef CONFIG_TRACER_MAX_TRACE
8630 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8631 allocate_snapshot ? size : 1);
8632 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8633 ring_buffer_free(tr->array_buffer.buffer);
8634 tr->array_buffer.buffer = NULL;
8635 free_percpu(tr->array_buffer.data);
8636 tr->array_buffer.data = NULL;
8637 return -ENOMEM;
8638 }
8639 tr->allocated_snapshot = allocate_snapshot;
8640
8641 /*
8642 * Only the top level trace array gets its snapshot allocated
8643 * from the kernel command line.
8644 */
8645 allocate_snapshot = false;
8646 #endif
8647
8648 return 0;
8649 }
8650
free_trace_buffer(struct array_buffer * buf)8651 static void free_trace_buffer(struct array_buffer *buf)
8652 {
8653 if (buf->buffer) {
8654 ring_buffer_free(buf->buffer);
8655 buf->buffer = NULL;
8656 free_percpu(buf->data);
8657 buf->data = NULL;
8658 }
8659 }
8660
free_trace_buffers(struct trace_array * tr)8661 static void free_trace_buffers(struct trace_array *tr)
8662 {
8663 if (!tr)
8664 return;
8665
8666 free_trace_buffer(&tr->array_buffer);
8667
8668 #ifdef CONFIG_TRACER_MAX_TRACE
8669 free_trace_buffer(&tr->max_buffer);
8670 #endif
8671 }
8672
init_trace_flags_index(struct trace_array * tr)8673 static void init_trace_flags_index(struct trace_array *tr)
8674 {
8675 int i;
8676
8677 /* Used by the trace options files */
8678 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8679 tr->trace_flags_index[i] = i;
8680 }
8681
__update_tracer_options(struct trace_array * tr)8682 static void __update_tracer_options(struct trace_array *tr)
8683 {
8684 struct tracer *t;
8685
8686 for (t = trace_types; t; t = t->next)
8687 add_tracer_options(tr, t);
8688 }
8689
update_tracer_options(struct trace_array * tr)8690 static void update_tracer_options(struct trace_array *tr)
8691 {
8692 mutex_lock(&trace_types_lock);
8693 tracer_options_updated = true;
8694 __update_tracer_options(tr);
8695 mutex_unlock(&trace_types_lock);
8696 }
8697
8698 /* Must have trace_types_lock held */
trace_array_find(const char * instance)8699 struct trace_array *trace_array_find(const char *instance)
8700 {
8701 struct trace_array *tr, *found = NULL;
8702
8703 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8704 if (tr->name && strcmp(tr->name, instance) == 0) {
8705 found = tr;
8706 break;
8707 }
8708 }
8709
8710 return found;
8711 }
8712
trace_array_find_get(const char * instance)8713 struct trace_array *trace_array_find_get(const char *instance)
8714 {
8715 struct trace_array *tr;
8716
8717 mutex_lock(&trace_types_lock);
8718 tr = trace_array_find(instance);
8719 if (tr)
8720 tr->ref++;
8721 mutex_unlock(&trace_types_lock);
8722
8723 return tr;
8724 }
8725
trace_array_create_dir(struct trace_array * tr)8726 static int trace_array_create_dir(struct trace_array *tr)
8727 {
8728 int ret;
8729
8730 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8731 if (!tr->dir)
8732 return -EINVAL;
8733
8734 ret = event_trace_add_tracer(tr->dir, tr);
8735 if (ret) {
8736 tracefs_remove(tr->dir);
8737 return ret;
8738 }
8739
8740 init_tracer_tracefs(tr, tr->dir);
8741 __update_tracer_options(tr);
8742
8743 return ret;
8744 }
8745
trace_array_create(const char * name)8746 static struct trace_array *trace_array_create(const char *name)
8747 {
8748 struct trace_array *tr;
8749 int ret;
8750
8751 ret = -ENOMEM;
8752 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8753 if (!tr)
8754 return ERR_PTR(ret);
8755
8756 tr->name = kstrdup(name, GFP_KERNEL);
8757 if (!tr->name)
8758 goto out_free_tr;
8759
8760 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8761 goto out_free_tr;
8762
8763 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8764
8765 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8766
8767 raw_spin_lock_init(&tr->start_lock);
8768
8769 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8770
8771 tr->current_trace = &nop_trace;
8772
8773 INIT_LIST_HEAD(&tr->systems);
8774 INIT_LIST_HEAD(&tr->events);
8775 INIT_LIST_HEAD(&tr->hist_vars);
8776 INIT_LIST_HEAD(&tr->err_log);
8777
8778 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8779 goto out_free_tr;
8780
8781 if (ftrace_allocate_ftrace_ops(tr) < 0)
8782 goto out_free_tr;
8783
8784 ftrace_init_trace_array(tr);
8785
8786 init_trace_flags_index(tr);
8787
8788 if (trace_instance_dir) {
8789 ret = trace_array_create_dir(tr);
8790 if (ret)
8791 goto out_free_tr;
8792 } else
8793 __trace_early_add_events(tr);
8794
8795 list_add(&tr->list, &ftrace_trace_arrays);
8796
8797 tr->ref++;
8798
8799 return tr;
8800
8801 out_free_tr:
8802 ftrace_free_ftrace_ops(tr);
8803 free_trace_buffers(tr);
8804 free_cpumask_var(tr->tracing_cpumask);
8805 kfree(tr->name);
8806 kfree(tr);
8807
8808 return ERR_PTR(ret);
8809 }
8810
instance_mkdir(const char * name)8811 static int instance_mkdir(const char *name)
8812 {
8813 struct trace_array *tr;
8814 int ret;
8815
8816 mutex_lock(&event_mutex);
8817 mutex_lock(&trace_types_lock);
8818
8819 ret = -EEXIST;
8820 if (trace_array_find(name))
8821 goto out_unlock;
8822
8823 tr = trace_array_create(name);
8824
8825 ret = PTR_ERR_OR_ZERO(tr);
8826
8827 out_unlock:
8828 mutex_unlock(&trace_types_lock);
8829 mutex_unlock(&event_mutex);
8830 return ret;
8831 }
8832
8833 /**
8834 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8835 * @name: The name of the trace array to be looked up/created.
8836 *
8837 * Returns pointer to trace array with given name.
8838 * NULL, if it cannot be created.
8839 *
8840 * NOTE: This function increments the reference counter associated with the
8841 * trace array returned. This makes sure it cannot be freed while in use.
8842 * Use trace_array_put() once the trace array is no longer needed.
8843 * If the trace_array is to be freed, trace_array_destroy() needs to
8844 * be called after the trace_array_put(), or simply let user space delete
8845 * it from the tracefs instances directory. But until the
8846 * trace_array_put() is called, user space can not delete it.
8847 *
8848 */
trace_array_get_by_name(const char * name)8849 struct trace_array *trace_array_get_by_name(const char *name)
8850 {
8851 struct trace_array *tr;
8852
8853 mutex_lock(&event_mutex);
8854 mutex_lock(&trace_types_lock);
8855
8856 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8857 if (tr->name && strcmp(tr->name, name) == 0)
8858 goto out_unlock;
8859 }
8860
8861 tr = trace_array_create(name);
8862
8863 if (IS_ERR(tr))
8864 tr = NULL;
8865 out_unlock:
8866 if (tr)
8867 tr->ref++;
8868
8869 mutex_unlock(&trace_types_lock);
8870 mutex_unlock(&event_mutex);
8871 return tr;
8872 }
8873 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8874
__remove_instance(struct trace_array * tr)8875 static int __remove_instance(struct trace_array *tr)
8876 {
8877 int i;
8878
8879 /* Reference counter for a newly created trace array = 1. */
8880 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8881 return -EBUSY;
8882
8883 list_del(&tr->list);
8884
8885 /* Disable all the flags that were enabled coming in */
8886 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8887 if ((1 << i) & ZEROED_TRACE_FLAGS)
8888 set_tracer_flag(tr, 1 << i, 0);
8889 }
8890
8891 tracing_set_nop(tr);
8892 clear_ftrace_function_probes(tr);
8893 event_trace_del_tracer(tr);
8894 ftrace_clear_pids(tr);
8895 ftrace_destroy_function_files(tr);
8896 tracefs_remove(tr->dir);
8897 free_trace_buffers(tr);
8898 clear_tracing_err_log(tr);
8899
8900 for (i = 0; i < tr->nr_topts; i++) {
8901 kfree(tr->topts[i].topts);
8902 }
8903 kfree(tr->topts);
8904
8905 free_cpumask_var(tr->tracing_cpumask);
8906 kfree(tr->name);
8907 kfree(tr);
8908
8909 return 0;
8910 }
8911
trace_array_destroy(struct trace_array * this_tr)8912 int trace_array_destroy(struct trace_array *this_tr)
8913 {
8914 struct trace_array *tr;
8915 int ret;
8916
8917 if (!this_tr)
8918 return -EINVAL;
8919
8920 mutex_lock(&event_mutex);
8921 mutex_lock(&trace_types_lock);
8922
8923 ret = -ENODEV;
8924
8925 /* Making sure trace array exists before destroying it. */
8926 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8927 if (tr == this_tr) {
8928 ret = __remove_instance(tr);
8929 break;
8930 }
8931 }
8932
8933 mutex_unlock(&trace_types_lock);
8934 mutex_unlock(&event_mutex);
8935
8936 return ret;
8937 }
8938 EXPORT_SYMBOL_GPL(trace_array_destroy);
8939
instance_rmdir(const char * name)8940 static int instance_rmdir(const char *name)
8941 {
8942 struct trace_array *tr;
8943 int ret;
8944
8945 mutex_lock(&event_mutex);
8946 mutex_lock(&trace_types_lock);
8947
8948 ret = -ENODEV;
8949 tr = trace_array_find(name);
8950 if (tr)
8951 ret = __remove_instance(tr);
8952
8953 mutex_unlock(&trace_types_lock);
8954 mutex_unlock(&event_mutex);
8955
8956 return ret;
8957 }
8958
create_trace_instances(struct dentry * d_tracer)8959 static __init void create_trace_instances(struct dentry *d_tracer)
8960 {
8961 struct trace_array *tr;
8962
8963 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8964 instance_mkdir,
8965 instance_rmdir);
8966 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8967 return;
8968
8969 mutex_lock(&event_mutex);
8970 mutex_lock(&trace_types_lock);
8971
8972 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8973 if (!tr->name)
8974 continue;
8975 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8976 "Failed to create instance directory\n"))
8977 break;
8978 }
8979
8980 mutex_unlock(&trace_types_lock);
8981 mutex_unlock(&event_mutex);
8982 }
8983
8984 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)8985 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8986 {
8987 struct trace_event_file *file;
8988 int cpu;
8989
8990 trace_create_file("available_tracers", 0444, d_tracer,
8991 tr, &show_traces_fops);
8992
8993 trace_create_file("current_tracer", 0644, d_tracer,
8994 tr, &set_tracer_fops);
8995
8996 trace_create_file("tracing_cpumask", 0644, d_tracer,
8997 tr, &tracing_cpumask_fops);
8998
8999 trace_create_file("trace_options", 0644, d_tracer,
9000 tr, &tracing_iter_fops);
9001
9002 trace_create_file("trace", 0644, d_tracer,
9003 tr, &tracing_fops);
9004
9005 trace_create_file("trace_pipe", 0444, d_tracer,
9006 tr, &tracing_pipe_fops);
9007
9008 trace_create_file("buffer_size_kb", 0644, d_tracer,
9009 tr, &tracing_entries_fops);
9010
9011 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9012 tr, &tracing_total_entries_fops);
9013
9014 trace_create_file("free_buffer", 0200, d_tracer,
9015 tr, &tracing_free_buffer_fops);
9016
9017 trace_create_file("trace_marker", 0220, d_tracer,
9018 tr, &tracing_mark_fops);
9019
9020 file = __find_event_file(tr, "ftrace", "print");
9021 if (file && file->dir)
9022 trace_create_file("trigger", 0644, file->dir, file,
9023 &event_trigger_fops);
9024 tr->trace_marker_file = file;
9025
9026 trace_create_file("trace_marker_raw", 0220, d_tracer,
9027 tr, &tracing_mark_raw_fops);
9028
9029 trace_create_file("trace_clock", 0644, d_tracer, tr,
9030 &trace_clock_fops);
9031
9032 trace_create_file("tracing_on", 0644, d_tracer,
9033 tr, &rb_simple_fops);
9034
9035 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9036 &trace_time_stamp_mode_fops);
9037
9038 tr->buffer_percent = 50;
9039
9040 trace_create_file("buffer_percent", 0444, d_tracer,
9041 tr, &buffer_percent_fops);
9042
9043 create_trace_options_dir(tr);
9044
9045 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9046 trace_create_maxlat_file(tr, d_tracer);
9047 #endif
9048
9049 if (ftrace_create_function_files(tr, d_tracer))
9050 MEM_FAIL(1, "Could not allocate function filter files");
9051
9052 #ifdef CONFIG_TRACER_SNAPSHOT
9053 trace_create_file("snapshot", 0644, d_tracer,
9054 tr, &snapshot_fops);
9055 #endif
9056
9057 trace_create_file("error_log", 0644, d_tracer,
9058 tr, &tracing_err_log_fops);
9059
9060 for_each_tracing_cpu(cpu)
9061 tracing_init_tracefs_percpu(tr, cpu);
9062
9063 ftrace_init_tracefs(tr, d_tracer);
9064 }
9065
trace_automount(struct dentry * mntpt,void * ingore)9066 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9067 {
9068 struct vfsmount *mnt;
9069 struct file_system_type *type;
9070
9071 /*
9072 * To maintain backward compatibility for tools that mount
9073 * debugfs to get to the tracing facility, tracefs is automatically
9074 * mounted to the debugfs/tracing directory.
9075 */
9076 type = get_fs_type("tracefs");
9077 if (!type)
9078 return NULL;
9079 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9080 put_filesystem(type);
9081 if (IS_ERR(mnt))
9082 return NULL;
9083 mntget(mnt);
9084
9085 return mnt;
9086 }
9087
9088 /**
9089 * tracing_init_dentry - initialize top level trace array
9090 *
9091 * This is called when creating files or directories in the tracing
9092 * directory. It is called via fs_initcall() by any of the boot up code
9093 * and expects to return the dentry of the top level tracing directory.
9094 */
tracing_init_dentry(void)9095 int tracing_init_dentry(void)
9096 {
9097 struct trace_array *tr = &global_trace;
9098
9099 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9100 pr_warn("Tracing disabled due to lockdown\n");
9101 return -EPERM;
9102 }
9103
9104 /* The top level trace array uses NULL as parent */
9105 if (tr->dir)
9106 return 0;
9107
9108 if (WARN_ON(!tracefs_initialized()))
9109 return -ENODEV;
9110
9111 /*
9112 * As there may still be users that expect the tracing
9113 * files to exist in debugfs/tracing, we must automount
9114 * the tracefs file system there, so older tools still
9115 * work with the newer kerenl.
9116 */
9117 tr->dir = debugfs_create_automount("tracing", NULL,
9118 trace_automount, NULL);
9119
9120 return 0;
9121 }
9122
9123 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9124 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9125
trace_eval_init(void)9126 static void __init trace_eval_init(void)
9127 {
9128 int len;
9129
9130 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9131 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9132 }
9133
9134 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9135 static void trace_module_add_evals(struct module *mod)
9136 {
9137 if (!mod->num_trace_evals)
9138 return;
9139
9140 /*
9141 * Modules with bad taint do not have events created, do
9142 * not bother with enums either.
9143 */
9144 if (trace_module_has_bad_taint(mod))
9145 return;
9146
9147 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9148 }
9149
9150 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9151 static void trace_module_remove_evals(struct module *mod)
9152 {
9153 union trace_eval_map_item *map;
9154 union trace_eval_map_item **last = &trace_eval_maps;
9155
9156 if (!mod->num_trace_evals)
9157 return;
9158
9159 mutex_lock(&trace_eval_mutex);
9160
9161 map = trace_eval_maps;
9162
9163 while (map) {
9164 if (map->head.mod == mod)
9165 break;
9166 map = trace_eval_jmp_to_tail(map);
9167 last = &map->tail.next;
9168 map = map->tail.next;
9169 }
9170 if (!map)
9171 goto out;
9172
9173 *last = trace_eval_jmp_to_tail(map)->tail.next;
9174 kfree(map);
9175 out:
9176 mutex_unlock(&trace_eval_mutex);
9177 }
9178 #else
trace_module_remove_evals(struct module * mod)9179 static inline void trace_module_remove_evals(struct module *mod) { }
9180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9181
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9182 static int trace_module_notify(struct notifier_block *self,
9183 unsigned long val, void *data)
9184 {
9185 struct module *mod = data;
9186
9187 switch (val) {
9188 case MODULE_STATE_COMING:
9189 trace_module_add_evals(mod);
9190 break;
9191 case MODULE_STATE_GOING:
9192 trace_module_remove_evals(mod);
9193 break;
9194 }
9195
9196 return NOTIFY_OK;
9197 }
9198
9199 static struct notifier_block trace_module_nb = {
9200 .notifier_call = trace_module_notify,
9201 .priority = 0,
9202 };
9203 #endif /* CONFIG_MODULES */
9204
tracer_init_tracefs(void)9205 static __init int tracer_init_tracefs(void)
9206 {
9207 int ret;
9208
9209 trace_access_lock_init();
9210
9211 ret = tracing_init_dentry();
9212 if (ret)
9213 return 0;
9214
9215 event_trace_init();
9216
9217 init_tracer_tracefs(&global_trace, NULL);
9218 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9219
9220 trace_create_file("tracing_thresh", 0644, NULL,
9221 &global_trace, &tracing_thresh_fops);
9222
9223 trace_create_file("README", 0444, NULL,
9224 NULL, &tracing_readme_fops);
9225
9226 trace_create_file("saved_cmdlines", 0444, NULL,
9227 NULL, &tracing_saved_cmdlines_fops);
9228
9229 trace_create_file("saved_cmdlines_size", 0644, NULL,
9230 NULL, &tracing_saved_cmdlines_size_fops);
9231
9232 trace_create_file("saved_tgids", 0444, NULL,
9233 NULL, &tracing_saved_tgids_fops);
9234
9235 trace_eval_init();
9236
9237 trace_create_eval_file(NULL);
9238
9239 #ifdef CONFIG_MODULES
9240 register_module_notifier(&trace_module_nb);
9241 #endif
9242
9243 #ifdef CONFIG_DYNAMIC_FTRACE
9244 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9245 NULL, &tracing_dyn_info_fops);
9246 #endif
9247
9248 create_trace_instances(NULL);
9249
9250 update_tracer_options(&global_trace);
9251
9252 return 0;
9253 }
9254
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9255 static int trace_panic_handler(struct notifier_block *this,
9256 unsigned long event, void *unused)
9257 {
9258 if (ftrace_dump_on_oops)
9259 ftrace_dump(ftrace_dump_on_oops);
9260 return NOTIFY_OK;
9261 }
9262
9263 static struct notifier_block trace_panic_notifier = {
9264 .notifier_call = trace_panic_handler,
9265 .next = NULL,
9266 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9267 };
9268
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9269 static int trace_die_handler(struct notifier_block *self,
9270 unsigned long val,
9271 void *data)
9272 {
9273 switch (val) {
9274 case DIE_OOPS:
9275 if (ftrace_dump_on_oops)
9276 ftrace_dump(ftrace_dump_on_oops);
9277 break;
9278 default:
9279 break;
9280 }
9281 return NOTIFY_OK;
9282 }
9283
9284 static struct notifier_block trace_die_notifier = {
9285 .notifier_call = trace_die_handler,
9286 .priority = 200
9287 };
9288
9289 /*
9290 * printk is set to max of 1024, we really don't need it that big.
9291 * Nothing should be printing 1000 characters anyway.
9292 */
9293 #define TRACE_MAX_PRINT 1000
9294
9295 /*
9296 * Define here KERN_TRACE so that we have one place to modify
9297 * it if we decide to change what log level the ftrace dump
9298 * should be at.
9299 */
9300 #define KERN_TRACE KERN_EMERG
9301
9302 void
trace_printk_seq(struct trace_seq * s)9303 trace_printk_seq(struct trace_seq *s)
9304 {
9305 /* Probably should print a warning here. */
9306 if (s->seq.len >= TRACE_MAX_PRINT)
9307 s->seq.len = TRACE_MAX_PRINT;
9308
9309 /*
9310 * More paranoid code. Although the buffer size is set to
9311 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9312 * an extra layer of protection.
9313 */
9314 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9315 s->seq.len = s->seq.size - 1;
9316
9317 /* should be zero ended, but we are paranoid. */
9318 s->buffer[s->seq.len] = 0;
9319
9320 printk(KERN_TRACE "%s", s->buffer);
9321
9322 trace_seq_init(s);
9323 }
9324
trace_init_global_iter(struct trace_iterator * iter)9325 void trace_init_global_iter(struct trace_iterator *iter)
9326 {
9327 iter->tr = &global_trace;
9328 iter->trace = iter->tr->current_trace;
9329 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9330 iter->array_buffer = &global_trace.array_buffer;
9331
9332 if (iter->trace && iter->trace->open)
9333 iter->trace->open(iter);
9334
9335 /* Annotate start of buffers if we had overruns */
9336 if (ring_buffer_overruns(iter->array_buffer->buffer))
9337 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9338
9339 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9340 if (trace_clocks[iter->tr->clock_id].in_ns)
9341 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9342 }
9343
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9344 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9345 {
9346 /* use static because iter can be a bit big for the stack */
9347 static struct trace_iterator iter;
9348 static atomic_t dump_running;
9349 struct trace_array *tr = &global_trace;
9350 unsigned int old_userobj;
9351 unsigned long flags;
9352 int cnt = 0, cpu;
9353
9354 /* Only allow one dump user at a time. */
9355 if (atomic_inc_return(&dump_running) != 1) {
9356 atomic_dec(&dump_running);
9357 return;
9358 }
9359
9360 /*
9361 * Always turn off tracing when we dump.
9362 * We don't need to show trace output of what happens
9363 * between multiple crashes.
9364 *
9365 * If the user does a sysrq-z, then they can re-enable
9366 * tracing with echo 1 > tracing_on.
9367 */
9368 tracing_off();
9369
9370 local_irq_save(flags);
9371 printk_nmi_direct_enter();
9372
9373 /* Simulate the iterator */
9374 trace_init_global_iter(&iter);
9375 /* Can not use kmalloc for iter.temp */
9376 iter.temp = static_temp_buf;
9377 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9378
9379 for_each_tracing_cpu(cpu) {
9380 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9381 }
9382
9383 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9384
9385 /* don't look at user memory in panic mode */
9386 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9387
9388 switch (oops_dump_mode) {
9389 case DUMP_ALL:
9390 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9391 break;
9392 case DUMP_ORIG:
9393 iter.cpu_file = raw_smp_processor_id();
9394 break;
9395 case DUMP_NONE:
9396 goto out_enable;
9397 default:
9398 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9399 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9400 }
9401
9402 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9403
9404 /* Did function tracer already get disabled? */
9405 if (ftrace_is_dead()) {
9406 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9407 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9408 }
9409
9410 /*
9411 * We need to stop all tracing on all CPUS to read
9412 * the next buffer. This is a bit expensive, but is
9413 * not done often. We fill all what we can read,
9414 * and then release the locks again.
9415 */
9416
9417 while (!trace_empty(&iter)) {
9418
9419 if (!cnt)
9420 printk(KERN_TRACE "---------------------------------\n");
9421
9422 cnt++;
9423
9424 trace_iterator_reset(&iter);
9425 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9426
9427 if (trace_find_next_entry_inc(&iter) != NULL) {
9428 int ret;
9429
9430 ret = print_trace_line(&iter);
9431 if (ret != TRACE_TYPE_NO_CONSUME)
9432 trace_consume(&iter);
9433 }
9434 touch_nmi_watchdog();
9435
9436 trace_printk_seq(&iter.seq);
9437 }
9438
9439 if (!cnt)
9440 printk(KERN_TRACE " (ftrace buffer empty)\n");
9441 else
9442 printk(KERN_TRACE "---------------------------------\n");
9443
9444 out_enable:
9445 tr->trace_flags |= old_userobj;
9446
9447 for_each_tracing_cpu(cpu) {
9448 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9449 }
9450 atomic_dec(&dump_running);
9451 printk_nmi_direct_exit();
9452 local_irq_restore(flags);
9453 }
9454 EXPORT_SYMBOL_GPL(ftrace_dump);
9455
trace_run_command(const char * buf,int (* createfn)(int,char **))9456 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9457 {
9458 char **argv;
9459 int argc, ret;
9460
9461 argc = 0;
9462 ret = 0;
9463 argv = argv_split(GFP_KERNEL, buf, &argc);
9464 if (!argv)
9465 return -ENOMEM;
9466
9467 if (argc)
9468 ret = createfn(argc, argv);
9469
9470 argv_free(argv);
9471
9472 return ret;
9473 }
9474
9475 #define WRITE_BUFSIZE 4096
9476
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9477 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9478 size_t count, loff_t *ppos,
9479 int (*createfn)(int, char **))
9480 {
9481 char *kbuf, *buf, *tmp;
9482 int ret = 0;
9483 size_t done = 0;
9484 size_t size;
9485
9486 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9487 if (!kbuf)
9488 return -ENOMEM;
9489
9490 while (done < count) {
9491 size = count - done;
9492
9493 if (size >= WRITE_BUFSIZE)
9494 size = WRITE_BUFSIZE - 1;
9495
9496 if (copy_from_user(kbuf, buffer + done, size)) {
9497 ret = -EFAULT;
9498 goto out;
9499 }
9500 kbuf[size] = '\0';
9501 buf = kbuf;
9502 do {
9503 tmp = strchr(buf, '\n');
9504 if (tmp) {
9505 *tmp = '\0';
9506 size = tmp - buf + 1;
9507 } else {
9508 size = strlen(buf);
9509 if (done + size < count) {
9510 if (buf != kbuf)
9511 break;
9512 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9513 pr_warn("Line length is too long: Should be less than %d\n",
9514 WRITE_BUFSIZE - 2);
9515 ret = -EINVAL;
9516 goto out;
9517 }
9518 }
9519 done += size;
9520
9521 /* Remove comments */
9522 tmp = strchr(buf, '#');
9523
9524 if (tmp)
9525 *tmp = '\0';
9526
9527 ret = trace_run_command(buf, createfn);
9528 if (ret)
9529 goto out;
9530 buf += size;
9531
9532 } while (done < count);
9533 }
9534 ret = done;
9535
9536 out:
9537 kfree(kbuf);
9538
9539 return ret;
9540 }
9541
tracer_alloc_buffers(void)9542 __init static int tracer_alloc_buffers(void)
9543 {
9544 int ring_buf_size;
9545 int ret = -ENOMEM;
9546
9547
9548 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9549 pr_warn("Tracing disabled due to lockdown\n");
9550 return -EPERM;
9551 }
9552
9553 /*
9554 * Make sure we don't accidentally add more trace options
9555 * than we have bits for.
9556 */
9557 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9558
9559 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9560 goto out;
9561
9562 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9563 goto out_free_buffer_mask;
9564
9565 /* Only allocate trace_printk buffers if a trace_printk exists */
9566 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9567 /* Must be called before global_trace.buffer is allocated */
9568 trace_printk_init_buffers();
9569
9570 /* To save memory, keep the ring buffer size to its minimum */
9571 if (ring_buffer_expanded)
9572 ring_buf_size = trace_buf_size;
9573 else
9574 ring_buf_size = 1;
9575
9576 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9577 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9578
9579 raw_spin_lock_init(&global_trace.start_lock);
9580
9581 /*
9582 * The prepare callbacks allocates some memory for the ring buffer. We
9583 * don't free the buffer if the CPU goes down. If we were to free
9584 * the buffer, then the user would lose any trace that was in the
9585 * buffer. The memory will be removed once the "instance" is removed.
9586 */
9587 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9588 "trace/RB:preapre", trace_rb_cpu_prepare,
9589 NULL);
9590 if (ret < 0)
9591 goto out_free_cpumask;
9592 /* Used for event triggers */
9593 ret = -ENOMEM;
9594 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9595 if (!temp_buffer)
9596 goto out_rm_hp_state;
9597
9598 if (trace_create_savedcmd() < 0)
9599 goto out_free_temp_buffer;
9600
9601 /* TODO: make the number of buffers hot pluggable with CPUS */
9602 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9603 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9604 goto out_free_savedcmd;
9605 }
9606
9607 if (global_trace.buffer_disabled)
9608 tracing_off();
9609
9610 if (trace_boot_clock) {
9611 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9612 if (ret < 0)
9613 pr_warn("Trace clock %s not defined, going back to default\n",
9614 trace_boot_clock);
9615 }
9616
9617 /*
9618 * register_tracer() might reference current_trace, so it
9619 * needs to be set before we register anything. This is
9620 * just a bootstrap of current_trace anyway.
9621 */
9622 global_trace.current_trace = &nop_trace;
9623
9624 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9625
9626 ftrace_init_global_array_ops(&global_trace);
9627
9628 init_trace_flags_index(&global_trace);
9629
9630 register_tracer(&nop_trace);
9631
9632 /* Function tracing may start here (via kernel command line) */
9633 init_function_trace();
9634
9635 /* All seems OK, enable tracing */
9636 tracing_disabled = 0;
9637
9638 atomic_notifier_chain_register(&panic_notifier_list,
9639 &trace_panic_notifier);
9640
9641 register_die_notifier(&trace_die_notifier);
9642
9643 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9644
9645 INIT_LIST_HEAD(&global_trace.systems);
9646 INIT_LIST_HEAD(&global_trace.events);
9647 INIT_LIST_HEAD(&global_trace.hist_vars);
9648 INIT_LIST_HEAD(&global_trace.err_log);
9649 list_add(&global_trace.list, &ftrace_trace_arrays);
9650
9651 apply_trace_boot_options();
9652
9653 register_snapshot_cmd();
9654
9655 return 0;
9656
9657 out_free_savedcmd:
9658 free_saved_cmdlines_buffer(savedcmd);
9659 out_free_temp_buffer:
9660 ring_buffer_free(temp_buffer);
9661 out_rm_hp_state:
9662 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9663 out_free_cpumask:
9664 free_cpumask_var(global_trace.tracing_cpumask);
9665 out_free_buffer_mask:
9666 free_cpumask_var(tracing_buffer_mask);
9667 out:
9668 return ret;
9669 }
9670
early_trace_init(void)9671 void __init early_trace_init(void)
9672 {
9673 if (tracepoint_printk) {
9674 tracepoint_print_iter =
9675 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9676 if (MEM_FAIL(!tracepoint_print_iter,
9677 "Failed to allocate trace iterator\n"))
9678 tracepoint_printk = 0;
9679 else
9680 static_key_enable(&tracepoint_printk_key.key);
9681 }
9682 tracer_alloc_buffers();
9683
9684 init_events();
9685 }
9686
trace_init(void)9687 void __init trace_init(void)
9688 {
9689 trace_event_init();
9690 }
9691
clear_boot_tracer(void)9692 __init static int clear_boot_tracer(void)
9693 {
9694 /*
9695 * The default tracer at boot buffer is an init section.
9696 * This function is called in lateinit. If we did not
9697 * find the boot tracer, then clear it out, to prevent
9698 * later registration from accessing the buffer that is
9699 * about to be freed.
9700 */
9701 if (!default_bootup_tracer)
9702 return 0;
9703
9704 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9705 default_bootup_tracer);
9706 default_bootup_tracer = NULL;
9707
9708 return 0;
9709 }
9710
9711 fs_initcall(tracer_init_tracefs);
9712 late_initcall_sync(clear_boot_tracer);
9713
9714 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9715 __init static int tracing_set_default_clock(void)
9716 {
9717 /* sched_clock_stable() is determined in late_initcall */
9718 if (!trace_boot_clock && !sched_clock_stable()) {
9719 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9720 pr_warn("Can not set tracing clock due to lockdown\n");
9721 return -EPERM;
9722 }
9723
9724 printk(KERN_WARNING
9725 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9726 "If you want to keep using the local clock, then add:\n"
9727 " \"trace_clock=local\"\n"
9728 "on the kernel command line\n");
9729 tracing_set_clock(&global_trace, "global");
9730 }
9731
9732 return 0;
9733 }
9734 late_initcall_sync(tracing_set_default_clock);
9735 #endif
9736