1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <trace/hooks/ftrace_dump.h>
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58 * On boot up, the ring buffer is set to the minimum size, so that
59 * we do not waste memory on systems that are not using tracing.
60 */
61 bool ring_buffer_expanded;
62
63 /*
64 * We need to change this state when a selftest is running.
65 * A selftest will lurk into the ring-buffer to count the
66 * entries inserted during the selftest although some concurrent
67 * insertions into the ring-buffer such as trace_printk could occurred
68 * at the same time, giving false positive or negative results.
69 */
70 static bool __read_mostly tracing_selftest_running;
71
72 /*
73 * If boot-time tracing including tracers/events via kernel cmdline
74 * is running, we do not want to run SELFTEST.
75 */
76 bool __read_mostly tracing_selftest_disabled;
77
78 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)79 void __init disable_tracing_selftest(const char *reason)
80 {
81 if (!tracing_selftest_disabled) {
82 tracing_selftest_disabled = true;
83 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84 }
85 }
86 #endif
87
88 /* Pipe tracepoints to printk */
89 struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 { }
96 };
97
98 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 return 0;
102 }
103
104 /*
105 * To prevent the comm cache from being overwritten when no
106 * tracing is active, only save the comm when a trace event
107 * occurred.
108 */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112 * Kill all tracing for good (never come back).
113 * It is initialized to 1 but will turn to zero if the initialization
114 * of the tracer is successful. But that is the only place that sets
115 * this back to zero.
116 */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly tracing_buffer_mask;
120
121 /*
122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123 *
124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125 * is set, then ftrace_dump is called. This will output the contents
126 * of the ftrace buffers to the console. This is very useful for
127 * capturing traces that lead to crashes and outputing it to a
128 * serial console.
129 *
130 * It is default off, but you can enable it with either specifying
131 * "ftrace_dump_on_oops" in the kernel command line, or setting
132 * /proc/sys/kernel/ftrace_dump_on_oops
133 * Set 1 if you want to dump buffers of all CPUs
134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
135 */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 struct module *mod;
146 unsigned long length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152 /*
153 * "end" is first and points to NULL as it must be different
154 * than "mod" or "eval_string"
155 */
156 union trace_eval_map_item *next;
157 const char *end; /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163 * The trace_eval_maps are saved in an array with two extra elements,
164 * one at the beginning, and one at the end. The beginning item contains
165 * the count of the saved maps (head.length), and the module they
166 * belong to if not built in (head.mod). The ending item contains a
167 * pointer to the next array of saved eval_map items.
168 */
169 union trace_eval_map_item {
170 struct trace_eval_map map;
171 struct trace_eval_map_head head;
172 struct trace_eval_map_tail tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 struct trace_buffer *buffer,
181 unsigned long flags, int pc);
182
183 #define MAX_TRACER_SIZE 100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
set_cmdline_ftrace(char * str)189 static int __init set_cmdline_ftrace(char *str)
190 {
191 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 default_bootup_tracer = bootup_tracer_buf;
193 /* We are using ftrace early, expand it */
194 ring_buffer_expanded = true;
195 return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
set_ftrace_dump_on_oops(char * str)199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 if (*str++ != '=' || !*str) {
202 ftrace_dump_on_oops = DUMP_ALL;
203 return 1;
204 }
205
206 if (!strcmp("orig_cpu", str)) {
207 ftrace_dump_on_oops = DUMP_ORIG;
208 return 1;
209 }
210
211 return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
stop_trace_on_warning(char * str)215 static int __init stop_trace_on_warning(char *str)
216 {
217 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 __disable_trace_on_warning = 1;
219 return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
boot_alloc_snapshot(char * str)223 static int __init boot_alloc_snapshot(char *str)
224 {
225 allocate_snapshot = true;
226 /* We also need the main ring buffer expanded */
227 ring_buffer_expanded = true;
228 return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
set_trace_boot_options(char * str)235 static int __init set_trace_boot_options(char *str)
236 {
237 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 return 1;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
set_trace_boot_clock(char * str)245 static int __init set_trace_boot_clock(char *str)
246 {
247 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 trace_boot_clock = trace_boot_clock_buf;
249 return 1;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
set_tracepoint_printk(char * str)253 static int __init set_tracepoint_printk(char *str)
254 {
255 /* Ignore the "tp_printk_stop_on_boot" param */
256 if (*str == '_')
257 return 0;
258
259 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
260 tracepoint_printk = 1;
261 return 1;
262 }
263 __setup("tp_printk", set_tracepoint_printk);
264
ns2usecs(u64 nsec)265 unsigned long long ns2usecs(u64 nsec)
266 {
267 nsec += 500;
268 do_div(nsec, 1000);
269 return nsec;
270 }
271
272 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)273 trace_process_export(struct trace_export *export,
274 struct ring_buffer_event *event, int flag)
275 {
276 struct trace_entry *entry;
277 unsigned int size = 0;
278
279 if (export->flags & flag) {
280 entry = ring_buffer_event_data(event);
281 size = ring_buffer_event_length(event);
282 export->write(export, entry, size);
283 }
284 }
285
286 static DEFINE_MUTEX(ftrace_export_lock);
287
288 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
289
290 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
291 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
292 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
293
ftrace_exports_enable(struct trace_export * export)294 static inline void ftrace_exports_enable(struct trace_export *export)
295 {
296 if (export->flags & TRACE_EXPORT_FUNCTION)
297 static_branch_inc(&trace_function_exports_enabled);
298
299 if (export->flags & TRACE_EXPORT_EVENT)
300 static_branch_inc(&trace_event_exports_enabled);
301
302 if (export->flags & TRACE_EXPORT_MARKER)
303 static_branch_inc(&trace_marker_exports_enabled);
304 }
305
ftrace_exports_disable(struct trace_export * export)306 static inline void ftrace_exports_disable(struct trace_export *export)
307 {
308 if (export->flags & TRACE_EXPORT_FUNCTION)
309 static_branch_dec(&trace_function_exports_enabled);
310
311 if (export->flags & TRACE_EXPORT_EVENT)
312 static_branch_dec(&trace_event_exports_enabled);
313
314 if (export->flags & TRACE_EXPORT_MARKER)
315 static_branch_dec(&trace_marker_exports_enabled);
316 }
317
ftrace_exports(struct ring_buffer_event * event,int flag)318 static void ftrace_exports(struct ring_buffer_event *event, int flag)
319 {
320 struct trace_export *export;
321
322 preempt_disable_notrace();
323
324 export = rcu_dereference_raw_check(ftrace_exports_list);
325 while (export) {
326 trace_process_export(export, event, flag);
327 export = rcu_dereference_raw_check(export->next);
328 }
329
330 preempt_enable_notrace();
331 }
332
333 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)334 add_trace_export(struct trace_export **list, struct trace_export *export)
335 {
336 rcu_assign_pointer(export->next, *list);
337 /*
338 * We are entering export into the list but another
339 * CPU might be walking that list. We need to make sure
340 * the export->next pointer is valid before another CPU sees
341 * the export pointer included into the list.
342 */
343 rcu_assign_pointer(*list, export);
344 }
345
346 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)347 rm_trace_export(struct trace_export **list, struct trace_export *export)
348 {
349 struct trace_export **p;
350
351 for (p = list; *p != NULL; p = &(*p)->next)
352 if (*p == export)
353 break;
354
355 if (*p != export)
356 return -1;
357
358 rcu_assign_pointer(*p, (*p)->next);
359
360 return 0;
361 }
362
363 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)364 add_ftrace_export(struct trace_export **list, struct trace_export *export)
365 {
366 ftrace_exports_enable(export);
367
368 add_trace_export(list, export);
369 }
370
371 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)372 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
373 {
374 int ret;
375
376 ret = rm_trace_export(list, export);
377 ftrace_exports_disable(export);
378
379 return ret;
380 }
381
register_ftrace_export(struct trace_export * export)382 int register_ftrace_export(struct trace_export *export)
383 {
384 if (WARN_ON_ONCE(!export->write))
385 return -1;
386
387 mutex_lock(&ftrace_export_lock);
388
389 add_ftrace_export(&ftrace_exports_list, export);
390
391 mutex_unlock(&ftrace_export_lock);
392
393 return 0;
394 }
395 EXPORT_SYMBOL_GPL(register_ftrace_export);
396
unregister_ftrace_export(struct trace_export * export)397 int unregister_ftrace_export(struct trace_export *export)
398 {
399 int ret;
400
401 mutex_lock(&ftrace_export_lock);
402
403 ret = rm_ftrace_export(&ftrace_exports_list, export);
404
405 mutex_unlock(&ftrace_export_lock);
406
407 return ret;
408 }
409 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
410
411 /* trace_flags holds trace_options default values */
412 #define TRACE_DEFAULT_FLAGS \
413 (FUNCTION_DEFAULT_FLAGS | \
414 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
415 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
416 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
417 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
418
419 /* trace_options that are only supported by global_trace */
420 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
421 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
422
423 /* trace_flags that are default zero for instances */
424 #define ZEROED_TRACE_FLAGS \
425 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
426
427 /*
428 * The global_trace is the descriptor that holds the top-level tracing
429 * buffers for the live tracing.
430 */
431 static struct trace_array global_trace = {
432 .trace_flags = TRACE_DEFAULT_FLAGS,
433 };
434
435 LIST_HEAD(ftrace_trace_arrays);
436
trace_array_get(struct trace_array * this_tr)437 int trace_array_get(struct trace_array *this_tr)
438 {
439 struct trace_array *tr;
440 int ret = -ENODEV;
441
442 mutex_lock(&trace_types_lock);
443 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
444 if (tr == this_tr) {
445 tr->ref++;
446 ret = 0;
447 break;
448 }
449 }
450 mutex_unlock(&trace_types_lock);
451
452 return ret;
453 }
454
__trace_array_put(struct trace_array * this_tr)455 static void __trace_array_put(struct trace_array *this_tr)
456 {
457 WARN_ON(!this_tr->ref);
458 this_tr->ref--;
459 }
460
461 /**
462 * trace_array_put - Decrement the reference counter for this trace array.
463 *
464 * NOTE: Use this when we no longer need the trace array returned by
465 * trace_array_get_by_name(). This ensures the trace array can be later
466 * destroyed.
467 *
468 */
trace_array_put(struct trace_array * this_tr)469 void trace_array_put(struct trace_array *this_tr)
470 {
471 if (!this_tr)
472 return;
473
474 mutex_lock(&trace_types_lock);
475 __trace_array_put(this_tr);
476 mutex_unlock(&trace_types_lock);
477 }
478 EXPORT_SYMBOL_GPL(trace_array_put);
479
tracing_check_open_get_tr(struct trace_array * tr)480 int tracing_check_open_get_tr(struct trace_array *tr)
481 {
482 int ret;
483
484 ret = security_locked_down(LOCKDOWN_TRACEFS);
485 if (ret)
486 return ret;
487
488 if (tracing_disabled)
489 return -ENODEV;
490
491 if (tr && trace_array_get(tr) < 0)
492 return -ENODEV;
493
494 return 0;
495 }
496
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)497 int call_filter_check_discard(struct trace_event_call *call, void *rec,
498 struct trace_buffer *buffer,
499 struct ring_buffer_event *event)
500 {
501 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
502 !filter_match_preds(call->filter, rec)) {
503 __trace_event_discard_commit(buffer, event);
504 return 1;
505 }
506
507 return 0;
508 }
509
trace_free_pid_list(struct trace_pid_list * pid_list)510 void trace_free_pid_list(struct trace_pid_list *pid_list)
511 {
512 vfree(pid_list->pids);
513 kfree(pid_list);
514 }
515
516 /**
517 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
518 * @filtered_pids: The list of pids to check
519 * @search_pid: The PID to find in @filtered_pids
520 *
521 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
522 */
523 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)524 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
525 {
526 /*
527 * If pid_max changed after filtered_pids was created, we
528 * by default ignore all pids greater than the previous pid_max.
529 */
530 if (search_pid >= filtered_pids->pid_max)
531 return false;
532
533 return test_bit(search_pid, filtered_pids->pids);
534 }
535
536 /**
537 * trace_ignore_this_task - should a task be ignored for tracing
538 * @filtered_pids: The list of pids to check
539 * @task: The task that should be ignored if not filtered
540 *
541 * Checks if @task should be traced or not from @filtered_pids.
542 * Returns true if @task should *NOT* be traced.
543 * Returns false if @task should be traced.
544 */
545 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)546 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
547 struct trace_pid_list *filtered_no_pids,
548 struct task_struct *task)
549 {
550 /*
551 * If filterd_no_pids is not empty, and the task's pid is listed
552 * in filtered_no_pids, then return true.
553 * Otherwise, if filtered_pids is empty, that means we can
554 * trace all tasks. If it has content, then only trace pids
555 * within filtered_pids.
556 */
557
558 return (filtered_pids &&
559 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
560 (filtered_no_pids &&
561 trace_find_filtered_pid(filtered_no_pids, task->pid));
562 }
563
564 /**
565 * trace_filter_add_remove_task - Add or remove a task from a pid_list
566 * @pid_list: The list to modify
567 * @self: The current task for fork or NULL for exit
568 * @task: The task to add or remove
569 *
570 * If adding a task, if @self is defined, the task is only added if @self
571 * is also included in @pid_list. This happens on fork and tasks should
572 * only be added when the parent is listed. If @self is NULL, then the
573 * @task pid will be removed from the list, which would happen on exit
574 * of a task.
575 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)576 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
577 struct task_struct *self,
578 struct task_struct *task)
579 {
580 if (!pid_list)
581 return;
582
583 /* For forks, we only add if the forking task is listed */
584 if (self) {
585 if (!trace_find_filtered_pid(pid_list, self->pid))
586 return;
587 }
588
589 /* Sorry, but we don't support pid_max changing after setting */
590 if (task->pid >= pid_list->pid_max)
591 return;
592
593 /* "self" is set for forks, and NULL for exits */
594 if (self)
595 set_bit(task->pid, pid_list->pids);
596 else
597 clear_bit(task->pid, pid_list->pids);
598 }
599
600 /**
601 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
602 * @pid_list: The pid list to show
603 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
604 * @pos: The position of the file
605 *
606 * This is used by the seq_file "next" operation to iterate the pids
607 * listed in a trace_pid_list structure.
608 *
609 * Returns the pid+1 as we want to display pid of zero, but NULL would
610 * stop the iteration.
611 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)612 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
613 {
614 unsigned long pid = (unsigned long)v;
615
616 (*pos)++;
617
618 /* pid already is +1 of the actual prevous bit */
619 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
620
621 /* Return pid + 1 to allow zero to be represented */
622 if (pid < pid_list->pid_max)
623 return (void *)(pid + 1);
624
625 return NULL;
626 }
627
628 /**
629 * trace_pid_start - Used for seq_file to start reading pid lists
630 * @pid_list: The pid list to show
631 * @pos: The position of the file
632 *
633 * This is used by seq_file "start" operation to start the iteration
634 * of listing pids.
635 *
636 * Returns the pid+1 as we want to display pid of zero, but NULL would
637 * stop the iteration.
638 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)639 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
640 {
641 unsigned long pid;
642 loff_t l = 0;
643
644 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
645 if (pid >= pid_list->pid_max)
646 return NULL;
647
648 /* Return pid + 1 so that zero can be the exit value */
649 for (pid++; pid && l < *pos;
650 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
651 ;
652 return (void *)pid;
653 }
654
655 /**
656 * trace_pid_show - show the current pid in seq_file processing
657 * @m: The seq_file structure to write into
658 * @v: A void pointer of the pid (+1) value to display
659 *
660 * Can be directly used by seq_file operations to display the current
661 * pid value.
662 */
trace_pid_show(struct seq_file * m,void * v)663 int trace_pid_show(struct seq_file *m, void *v)
664 {
665 unsigned long pid = (unsigned long)v - 1;
666
667 seq_printf(m, "%lu\n", pid);
668 return 0;
669 }
670
671 /* 128 should be much more than enough */
672 #define PID_BUF_SIZE 127
673
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)674 int trace_pid_write(struct trace_pid_list *filtered_pids,
675 struct trace_pid_list **new_pid_list,
676 const char __user *ubuf, size_t cnt)
677 {
678 struct trace_pid_list *pid_list;
679 struct trace_parser parser;
680 unsigned long val;
681 int nr_pids = 0;
682 ssize_t read = 0;
683 ssize_t ret = 0;
684 loff_t pos;
685 pid_t pid;
686
687 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
688 return -ENOMEM;
689
690 /*
691 * Always recreate a new array. The write is an all or nothing
692 * operation. Always create a new array when adding new pids by
693 * the user. If the operation fails, then the current list is
694 * not modified.
695 */
696 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
697 if (!pid_list) {
698 trace_parser_put(&parser);
699 return -ENOMEM;
700 }
701
702 pid_list->pid_max = READ_ONCE(pid_max);
703
704 /* Only truncating will shrink pid_max */
705 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
706 pid_list->pid_max = filtered_pids->pid_max;
707
708 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
709 if (!pid_list->pids) {
710 trace_parser_put(&parser);
711 kfree(pid_list);
712 return -ENOMEM;
713 }
714
715 if (filtered_pids) {
716 /* copy the current bits to the new max */
717 for_each_set_bit(pid, filtered_pids->pids,
718 filtered_pids->pid_max) {
719 set_bit(pid, pid_list->pids);
720 nr_pids++;
721 }
722 }
723
724 while (cnt > 0) {
725
726 pos = 0;
727
728 ret = trace_get_user(&parser, ubuf, cnt, &pos);
729 if (ret < 0 || !trace_parser_loaded(&parser))
730 break;
731
732 read += ret;
733 ubuf += ret;
734 cnt -= ret;
735
736 ret = -EINVAL;
737 if (kstrtoul(parser.buffer, 0, &val))
738 break;
739 if (val >= pid_list->pid_max)
740 break;
741
742 pid = (pid_t)val;
743
744 set_bit(pid, pid_list->pids);
745 nr_pids++;
746
747 trace_parser_clear(&parser);
748 ret = 0;
749 }
750 trace_parser_put(&parser);
751
752 if (ret < 0) {
753 trace_free_pid_list(pid_list);
754 return ret;
755 }
756
757 if (!nr_pids) {
758 /* Cleared the list of pids */
759 trace_free_pid_list(pid_list);
760 read = ret;
761 pid_list = NULL;
762 }
763
764 *new_pid_list = pid_list;
765
766 return read;
767 }
768
buffer_ftrace_now(struct array_buffer * buf,int cpu)769 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
770 {
771 u64 ts;
772
773 /* Early boot up does not have a buffer yet */
774 if (!buf->buffer)
775 return trace_clock_local();
776
777 ts = ring_buffer_time_stamp(buf->buffer, cpu);
778 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
779
780 return ts;
781 }
782
ftrace_now(int cpu)783 u64 ftrace_now(int cpu)
784 {
785 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
786 }
787
788 /**
789 * tracing_is_enabled - Show if global_trace has been disabled
790 *
791 * Shows if the global trace has been enabled or not. It uses the
792 * mirror flag "buffer_disabled" to be used in fast paths such as for
793 * the irqsoff tracer. But it may be inaccurate due to races. If you
794 * need to know the accurate state, use tracing_is_on() which is a little
795 * slower, but accurate.
796 */
tracing_is_enabled(void)797 int tracing_is_enabled(void)
798 {
799 /*
800 * For quick access (irqsoff uses this in fast path), just
801 * return the mirror variable of the state of the ring buffer.
802 * It's a little racy, but we don't really care.
803 */
804 smp_rmb();
805 return !global_trace.buffer_disabled;
806 }
807
808 /*
809 * trace_buf_size is the size in bytes that is allocated
810 * for a buffer. Note, the number of bytes is always rounded
811 * to page size.
812 *
813 * This number is purposely set to a low number of 16384.
814 * If the dump on oops happens, it will be much appreciated
815 * to not have to wait for all that output. Anyway this can be
816 * boot time and run time configurable.
817 */
818 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
819
820 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
821
822 /* trace_types holds a link list of available tracers. */
823 static struct tracer *trace_types __read_mostly;
824
825 /*
826 * trace_types_lock is used to protect the trace_types list.
827 */
828 DEFINE_MUTEX(trace_types_lock);
829
830 /*
831 * serialize the access of the ring buffer
832 *
833 * ring buffer serializes readers, but it is low level protection.
834 * The validity of the events (which returns by ring_buffer_peek() ..etc)
835 * are not protected by ring buffer.
836 *
837 * The content of events may become garbage if we allow other process consumes
838 * these events concurrently:
839 * A) the page of the consumed events may become a normal page
840 * (not reader page) in ring buffer, and this page will be rewrited
841 * by events producer.
842 * B) The page of the consumed events may become a page for splice_read,
843 * and this page will be returned to system.
844 *
845 * These primitives allow multi process access to different cpu ring buffer
846 * concurrently.
847 *
848 * These primitives don't distinguish read-only and read-consume access.
849 * Multi read-only access are also serialized.
850 */
851
852 #ifdef CONFIG_SMP
853 static DECLARE_RWSEM(all_cpu_access_lock);
854 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
855
trace_access_lock(int cpu)856 static inline void trace_access_lock(int cpu)
857 {
858 if (cpu == RING_BUFFER_ALL_CPUS) {
859 /* gain it for accessing the whole ring buffer. */
860 down_write(&all_cpu_access_lock);
861 } else {
862 /* gain it for accessing a cpu ring buffer. */
863
864 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
865 down_read(&all_cpu_access_lock);
866
867 /* Secondly block other access to this @cpu ring buffer. */
868 mutex_lock(&per_cpu(cpu_access_lock, cpu));
869 }
870 }
871
trace_access_unlock(int cpu)872 static inline void trace_access_unlock(int cpu)
873 {
874 if (cpu == RING_BUFFER_ALL_CPUS) {
875 up_write(&all_cpu_access_lock);
876 } else {
877 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
878 up_read(&all_cpu_access_lock);
879 }
880 }
881
trace_access_lock_init(void)882 static inline void trace_access_lock_init(void)
883 {
884 int cpu;
885
886 for_each_possible_cpu(cpu)
887 mutex_init(&per_cpu(cpu_access_lock, cpu));
888 }
889
890 #else
891
892 static DEFINE_MUTEX(access_lock);
893
trace_access_lock(int cpu)894 static inline void trace_access_lock(int cpu)
895 {
896 (void)cpu;
897 mutex_lock(&access_lock);
898 }
899
trace_access_unlock(int cpu)900 static inline void trace_access_unlock(int cpu)
901 {
902 (void)cpu;
903 mutex_unlock(&access_lock);
904 }
905
trace_access_lock_init(void)906 static inline void trace_access_lock_init(void)
907 {
908 }
909
910 #endif
911
912 #ifdef CONFIG_STACKTRACE
913 static void __ftrace_trace_stack(struct trace_buffer *buffer,
914 unsigned long flags,
915 int skip, int pc, struct pt_regs *regs);
916 static inline void ftrace_trace_stack(struct trace_array *tr,
917 struct trace_buffer *buffer,
918 unsigned long flags,
919 int skip, int pc, struct pt_regs *regs);
920
921 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)922 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
923 unsigned long flags,
924 int skip, int pc, struct pt_regs *regs)
925 {
926 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)927 static inline void ftrace_trace_stack(struct trace_array *tr,
928 struct trace_buffer *buffer,
929 unsigned long flags,
930 int skip, int pc, struct pt_regs *regs)
931 {
932 }
933
934 #endif
935
936 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)937 trace_event_setup(struct ring_buffer_event *event,
938 int type, unsigned long flags, int pc)
939 {
940 struct trace_entry *ent = ring_buffer_event_data(event);
941
942 tracing_generic_entry_update(ent, type, flags, pc);
943 }
944
945 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)946 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
947 int type,
948 unsigned long len,
949 unsigned long flags, int pc)
950 {
951 struct ring_buffer_event *event;
952
953 event = ring_buffer_lock_reserve(buffer, len);
954 if (event != NULL)
955 trace_event_setup(event, type, flags, pc);
956
957 return event;
958 }
959
tracer_tracing_on(struct trace_array * tr)960 void tracer_tracing_on(struct trace_array *tr)
961 {
962 if (tr->array_buffer.buffer)
963 ring_buffer_record_on(tr->array_buffer.buffer);
964 /*
965 * This flag is looked at when buffers haven't been allocated
966 * yet, or by some tracers (like irqsoff), that just want to
967 * know if the ring buffer has been disabled, but it can handle
968 * races of where it gets disabled but we still do a record.
969 * As the check is in the fast path of the tracers, it is more
970 * important to be fast than accurate.
971 */
972 tr->buffer_disabled = 0;
973 /* Make the flag seen by readers */
974 smp_wmb();
975 }
976
977 /**
978 * tracing_on - enable tracing buffers
979 *
980 * This function enables tracing buffers that may have been
981 * disabled with tracing_off.
982 */
tracing_on(void)983 void tracing_on(void)
984 {
985 tracer_tracing_on(&global_trace);
986 }
987 EXPORT_SYMBOL_GPL(tracing_on);
988
989
990 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)991 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
992 {
993 __this_cpu_write(trace_taskinfo_save, true);
994
995 /* If this is the temp buffer, we need to commit fully */
996 if (this_cpu_read(trace_buffered_event) == event) {
997 /* Length is in event->array[0] */
998 ring_buffer_write(buffer, event->array[0], &event->array[1]);
999 /* Release the temp buffer */
1000 this_cpu_dec(trace_buffered_event_cnt);
1001 } else
1002 ring_buffer_unlock_commit(buffer, event);
1003 }
1004
1005 /**
1006 * __trace_puts - write a constant string into the trace buffer.
1007 * @ip: The address of the caller
1008 * @str: The constant string to write
1009 * @size: The size of the string.
1010 */
__trace_puts(unsigned long ip,const char * str,int size)1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013 struct ring_buffer_event *event;
1014 struct trace_buffer *buffer;
1015 struct print_entry *entry;
1016 unsigned long irq_flags;
1017 int alloc;
1018 int pc;
1019
1020 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1021 return 0;
1022
1023 pc = preempt_count();
1024
1025 if (unlikely(tracing_selftest_running || tracing_disabled))
1026 return 0;
1027
1028 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1029
1030 local_save_flags(irq_flags);
1031 buffer = global_trace.array_buffer.buffer;
1032 ring_buffer_nest_start(buffer);
1033 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1034 irq_flags, pc);
1035 if (!event) {
1036 size = 0;
1037 goto out;
1038 }
1039
1040 entry = ring_buffer_event_data(event);
1041 entry->ip = ip;
1042
1043 memcpy(&entry->buf, str, size);
1044
1045 /* Add a newline if necessary */
1046 if (entry->buf[size - 1] != '\n') {
1047 entry->buf[size] = '\n';
1048 entry->buf[size + 1] = '\0';
1049 } else
1050 entry->buf[size] = '\0';
1051
1052 __buffer_unlock_commit(buffer, event);
1053 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1054 out:
1055 ring_buffer_nest_end(buffer);
1056 return size;
1057 }
1058 EXPORT_SYMBOL_GPL(__trace_puts);
1059
1060 /**
1061 * __trace_bputs - write the pointer to a constant string into trace buffer
1062 * @ip: The address of the caller
1063 * @str: The constant string to write to the buffer to
1064 */
__trace_bputs(unsigned long ip,const char * str)1065 int __trace_bputs(unsigned long ip, const char *str)
1066 {
1067 struct ring_buffer_event *event;
1068 struct trace_buffer *buffer;
1069 struct bputs_entry *entry;
1070 unsigned long irq_flags;
1071 int size = sizeof(struct bputs_entry);
1072 int ret = 0;
1073 int pc;
1074
1075 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1076 return 0;
1077
1078 pc = preempt_count();
1079
1080 if (unlikely(tracing_selftest_running || tracing_disabled))
1081 return 0;
1082
1083 local_save_flags(irq_flags);
1084 buffer = global_trace.array_buffer.buffer;
1085
1086 ring_buffer_nest_start(buffer);
1087 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088 irq_flags, pc);
1089 if (!event)
1090 goto out;
1091
1092 entry = ring_buffer_event_data(event);
1093 entry->ip = ip;
1094 entry->str = str;
1095
1096 __buffer_unlock_commit(buffer, event);
1097 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1098
1099 ret = 1;
1100 out:
1101 ring_buffer_nest_end(buffer);
1102 return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108 void *cond_data)
1109 {
1110 struct tracer *tracer = tr->current_trace;
1111 unsigned long flags;
1112
1113 if (in_nmi()) {
1114 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115 internal_trace_puts("*** snapshot is being ignored ***\n");
1116 return;
1117 }
1118
1119 if (!tr->allocated_snapshot) {
1120 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121 internal_trace_puts("*** stopping trace here! ***\n");
1122 tracing_off();
1123 return;
1124 }
1125
1126 /* Note, snapshot can not be used when the tracer uses it */
1127 if (tracer->use_max_tr) {
1128 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130 return;
1131 }
1132
1133 local_irq_save(flags);
1134 update_max_tr(tr, current, smp_processor_id(), cond_data);
1135 local_irq_restore(flags);
1136 }
1137
tracing_snapshot_instance(struct trace_array * tr)1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140 tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142
1143 /**
1144 * tracing_snapshot - take a snapshot of the current buffer.
1145 *
1146 * This causes a swap between the snapshot buffer and the current live
1147 * tracing buffer. You can use this to take snapshots of the live
1148 * trace when some condition is triggered, but continue to trace.
1149 *
1150 * Note, make sure to allocate the snapshot with either
1151 * a tracing_snapshot_alloc(), or by doing it manually
1152 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153 *
1154 * If the snapshot buffer is not allocated, it will stop tracing.
1155 * Basically making a permanent snapshot.
1156 */
tracing_snapshot(void)1157 void tracing_snapshot(void)
1158 {
1159 struct trace_array *tr = &global_trace;
1160
1161 tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165 /**
1166 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167 * @tr: The tracing instance to snapshot
1168 * @cond_data: The data to be tested conditionally, and possibly saved
1169 *
1170 * This is the same as tracing_snapshot() except that the snapshot is
1171 * conditional - the snapshot will only happen if the
1172 * cond_snapshot.update() implementation receiving the cond_data
1173 * returns true, which means that the trace array's cond_snapshot
1174 * update() operation used the cond_data to determine whether the
1175 * snapshot should be taken, and if it was, presumably saved it along
1176 * with the snapshot.
1177 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180 tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184 /**
1185 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186 * @tr: The tracing instance
1187 *
1188 * When the user enables a conditional snapshot using
1189 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190 * with the snapshot. This accessor is used to retrieve it.
1191 *
1192 * Should not be called from cond_snapshot.update(), since it takes
1193 * the tr->max_lock lock, which the code calling
1194 * cond_snapshot.update() has already done.
1195 *
1196 * Returns the cond_data associated with the trace array's snapshot.
1197 */
tracing_cond_snapshot_data(struct trace_array * tr)1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 void *cond_data = NULL;
1201
1202 local_irq_disable();
1203 arch_spin_lock(&tr->max_lock);
1204
1205 if (tr->cond_snapshot)
1206 cond_data = tr->cond_snapshot->cond_data;
1207
1208 arch_spin_unlock(&tr->max_lock);
1209 local_irq_enable();
1210
1211 return cond_data;
1212 }
1213 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1214
1215 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1216 struct array_buffer *size_buf, int cpu_id);
1217 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1218
tracing_alloc_snapshot_instance(struct trace_array * tr)1219 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1220 {
1221 int ret;
1222
1223 if (!tr->allocated_snapshot) {
1224
1225 /* allocate spare buffer */
1226 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1227 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1228 if (ret < 0)
1229 return ret;
1230
1231 tr->allocated_snapshot = true;
1232 }
1233
1234 return 0;
1235 }
1236
free_snapshot(struct trace_array * tr)1237 static void free_snapshot(struct trace_array *tr)
1238 {
1239 /*
1240 * We don't free the ring buffer. instead, resize it because
1241 * The max_tr ring buffer has some state (e.g. ring->clock) and
1242 * we want preserve it.
1243 */
1244 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1245 set_buffer_entries(&tr->max_buffer, 1);
1246 tracing_reset_online_cpus(&tr->max_buffer);
1247 tr->allocated_snapshot = false;
1248 }
1249
1250 /**
1251 * tracing_alloc_snapshot - allocate snapshot buffer.
1252 *
1253 * This only allocates the snapshot buffer if it isn't already
1254 * allocated - it doesn't also take a snapshot.
1255 *
1256 * This is meant to be used in cases where the snapshot buffer needs
1257 * to be set up for events that can't sleep but need to be able to
1258 * trigger a snapshot.
1259 */
tracing_alloc_snapshot(void)1260 int tracing_alloc_snapshot(void)
1261 {
1262 struct trace_array *tr = &global_trace;
1263 int ret;
1264
1265 ret = tracing_alloc_snapshot_instance(tr);
1266 WARN_ON(ret < 0);
1267
1268 return ret;
1269 }
1270 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1271
1272 /**
1273 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1274 *
1275 * This is similar to tracing_snapshot(), but it will allocate the
1276 * snapshot buffer if it isn't already allocated. Use this only
1277 * where it is safe to sleep, as the allocation may sleep.
1278 *
1279 * This causes a swap between the snapshot buffer and the current live
1280 * tracing buffer. You can use this to take snapshots of the live
1281 * trace when some condition is triggered, but continue to trace.
1282 */
tracing_snapshot_alloc(void)1283 void tracing_snapshot_alloc(void)
1284 {
1285 int ret;
1286
1287 ret = tracing_alloc_snapshot();
1288 if (ret < 0)
1289 return;
1290
1291 tracing_snapshot();
1292 }
1293 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1294
1295 /**
1296 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1297 * @tr: The tracing instance
1298 * @cond_data: User data to associate with the snapshot
1299 * @update: Implementation of the cond_snapshot update function
1300 *
1301 * Check whether the conditional snapshot for the given instance has
1302 * already been enabled, or if the current tracer is already using a
1303 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1304 * save the cond_data and update function inside.
1305 *
1306 * Returns 0 if successful, error otherwise.
1307 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1308 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1309 cond_update_fn_t update)
1310 {
1311 struct cond_snapshot *cond_snapshot;
1312 int ret = 0;
1313
1314 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1315 if (!cond_snapshot)
1316 return -ENOMEM;
1317
1318 cond_snapshot->cond_data = cond_data;
1319 cond_snapshot->update = update;
1320
1321 mutex_lock(&trace_types_lock);
1322
1323 ret = tracing_alloc_snapshot_instance(tr);
1324 if (ret)
1325 goto fail_unlock;
1326
1327 if (tr->current_trace->use_max_tr) {
1328 ret = -EBUSY;
1329 goto fail_unlock;
1330 }
1331
1332 /*
1333 * The cond_snapshot can only change to NULL without the
1334 * trace_types_lock. We don't care if we race with it going
1335 * to NULL, but we want to make sure that it's not set to
1336 * something other than NULL when we get here, which we can
1337 * do safely with only holding the trace_types_lock and not
1338 * having to take the max_lock.
1339 */
1340 if (tr->cond_snapshot) {
1341 ret = -EBUSY;
1342 goto fail_unlock;
1343 }
1344
1345 local_irq_disable();
1346 arch_spin_lock(&tr->max_lock);
1347 tr->cond_snapshot = cond_snapshot;
1348 arch_spin_unlock(&tr->max_lock);
1349 local_irq_enable();
1350
1351 mutex_unlock(&trace_types_lock);
1352
1353 return ret;
1354
1355 fail_unlock:
1356 mutex_unlock(&trace_types_lock);
1357 kfree(cond_snapshot);
1358 return ret;
1359 }
1360 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1361
1362 /**
1363 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1364 * @tr: The tracing instance
1365 *
1366 * Check whether the conditional snapshot for the given instance is
1367 * enabled; if so, free the cond_snapshot associated with it,
1368 * otherwise return -EINVAL.
1369 *
1370 * Returns 0 if successful, error otherwise.
1371 */
tracing_snapshot_cond_disable(struct trace_array * tr)1372 int tracing_snapshot_cond_disable(struct trace_array *tr)
1373 {
1374 int ret = 0;
1375
1376 local_irq_disable();
1377 arch_spin_lock(&tr->max_lock);
1378
1379 if (!tr->cond_snapshot)
1380 ret = -EINVAL;
1381 else {
1382 kfree(tr->cond_snapshot);
1383 tr->cond_snapshot = NULL;
1384 }
1385
1386 arch_spin_unlock(&tr->max_lock);
1387 local_irq_enable();
1388
1389 return ret;
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1392 #else
tracing_snapshot(void)1393 void tracing_snapshot(void)
1394 {
1395 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1396 }
1397 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1398 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1399 {
1400 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1403 int tracing_alloc_snapshot(void)
1404 {
1405 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1406 return -ENODEV;
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1409 void tracing_snapshot_alloc(void)
1410 {
1411 /* Give warning */
1412 tracing_snapshot();
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1415 void *tracing_cond_snapshot_data(struct trace_array *tr)
1416 {
1417 return NULL;
1418 }
1419 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1420 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1421 {
1422 return -ENODEV;
1423 }
1424 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1425 int tracing_snapshot_cond_disable(struct trace_array *tr)
1426 {
1427 return false;
1428 }
1429 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1430 #endif /* CONFIG_TRACER_SNAPSHOT */
1431
tracer_tracing_off(struct trace_array * tr)1432 void tracer_tracing_off(struct trace_array *tr)
1433 {
1434 if (tr->array_buffer.buffer)
1435 ring_buffer_record_off(tr->array_buffer.buffer);
1436 /*
1437 * This flag is looked at when buffers haven't been allocated
1438 * yet, or by some tracers (like irqsoff), that just want to
1439 * know if the ring buffer has been disabled, but it can handle
1440 * races of where it gets disabled but we still do a record.
1441 * As the check is in the fast path of the tracers, it is more
1442 * important to be fast than accurate.
1443 */
1444 tr->buffer_disabled = 1;
1445 /* Make the flag seen by readers */
1446 smp_wmb();
1447 }
1448
1449 /**
1450 * tracing_off - turn off tracing buffers
1451 *
1452 * This function stops the tracing buffers from recording data.
1453 * It does not disable any overhead the tracers themselves may
1454 * be causing. This function simply causes all recording to
1455 * the ring buffers to fail.
1456 */
tracing_off(void)1457 void tracing_off(void)
1458 {
1459 tracer_tracing_off(&global_trace);
1460 }
1461 EXPORT_SYMBOL_GPL(tracing_off);
1462
disable_trace_on_warning(void)1463 void disable_trace_on_warning(void)
1464 {
1465 if (__disable_trace_on_warning) {
1466 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1467 "Disabling tracing due to warning\n");
1468 tracing_off();
1469 }
1470 }
1471
1472 /**
1473 * tracer_tracing_is_on - show real state of ring buffer enabled
1474 * @tr : the trace array to know if ring buffer is enabled
1475 *
1476 * Shows real state of the ring buffer if it is enabled or not.
1477 */
tracer_tracing_is_on(struct trace_array * tr)1478 bool tracer_tracing_is_on(struct trace_array *tr)
1479 {
1480 if (tr->array_buffer.buffer)
1481 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1482 return !tr->buffer_disabled;
1483 }
1484
1485 /**
1486 * tracing_is_on - show state of ring buffers enabled
1487 */
tracing_is_on(void)1488 int tracing_is_on(void)
1489 {
1490 return tracer_tracing_is_on(&global_trace);
1491 }
1492 EXPORT_SYMBOL_GPL(tracing_is_on);
1493
set_buf_size(char * str)1494 static int __init set_buf_size(char *str)
1495 {
1496 unsigned long buf_size;
1497
1498 if (!str)
1499 return 0;
1500 buf_size = memparse(str, &str);
1501 /*
1502 * nr_entries can not be zero and the startup
1503 * tests require some buffer space. Therefore
1504 * ensure we have at least 4096 bytes of buffer.
1505 */
1506 trace_buf_size = max(4096UL, buf_size);
1507 return 1;
1508 }
1509 __setup("trace_buf_size=", set_buf_size);
1510
set_tracing_thresh(char * str)1511 static int __init set_tracing_thresh(char *str)
1512 {
1513 unsigned long threshold;
1514 int ret;
1515
1516 if (!str)
1517 return 0;
1518 ret = kstrtoul(str, 0, &threshold);
1519 if (ret < 0)
1520 return 0;
1521 tracing_thresh = threshold * 1000;
1522 return 1;
1523 }
1524 __setup("tracing_thresh=", set_tracing_thresh);
1525
nsecs_to_usecs(unsigned long nsecs)1526 unsigned long nsecs_to_usecs(unsigned long nsecs)
1527 {
1528 return nsecs / 1000;
1529 }
1530
1531 /*
1532 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1533 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1534 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1535 * of strings in the order that the evals (enum) were defined.
1536 */
1537 #undef C
1538 #define C(a, b) b
1539
1540 /* These must match the bit postions in trace_iterator_flags */
1541 static const char *trace_options[] = {
1542 TRACE_FLAGS
1543 NULL
1544 };
1545
1546 static struct {
1547 u64 (*func)(void);
1548 const char *name;
1549 int in_ns; /* is this clock in nanoseconds? */
1550 } trace_clocks[] = {
1551 { trace_clock_local, "local", 1 },
1552 { trace_clock_global, "global", 1 },
1553 { trace_clock_counter, "counter", 0 },
1554 { trace_clock_jiffies, "uptime", 0 },
1555 { trace_clock, "perf", 1 },
1556 { ktime_get_mono_fast_ns, "mono", 1 },
1557 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1558 { ktime_get_boot_fast_ns, "boot", 1 },
1559 ARCH_TRACE_CLOCKS
1560 };
1561
trace_clock_in_ns(struct trace_array * tr)1562 bool trace_clock_in_ns(struct trace_array *tr)
1563 {
1564 if (trace_clocks[tr->clock_id].in_ns)
1565 return true;
1566
1567 return false;
1568 }
1569
1570 /*
1571 * trace_parser_get_init - gets the buffer for trace parser
1572 */
trace_parser_get_init(struct trace_parser * parser,int size)1573 int trace_parser_get_init(struct trace_parser *parser, int size)
1574 {
1575 memset(parser, 0, sizeof(*parser));
1576
1577 parser->buffer = kmalloc(size, GFP_KERNEL);
1578 if (!parser->buffer)
1579 return 1;
1580
1581 parser->size = size;
1582 return 0;
1583 }
1584
1585 /*
1586 * trace_parser_put - frees the buffer for trace parser
1587 */
trace_parser_put(struct trace_parser * parser)1588 void trace_parser_put(struct trace_parser *parser)
1589 {
1590 kfree(parser->buffer);
1591 parser->buffer = NULL;
1592 }
1593
1594 /*
1595 * trace_get_user - reads the user input string separated by space
1596 * (matched by isspace(ch))
1597 *
1598 * For each string found the 'struct trace_parser' is updated,
1599 * and the function returns.
1600 *
1601 * Returns number of bytes read.
1602 *
1603 * See kernel/trace/trace.h for 'struct trace_parser' details.
1604 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1605 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1606 size_t cnt, loff_t *ppos)
1607 {
1608 char ch;
1609 size_t read = 0;
1610 ssize_t ret;
1611
1612 if (!*ppos)
1613 trace_parser_clear(parser);
1614
1615 ret = get_user(ch, ubuf++);
1616 if (ret)
1617 goto out;
1618
1619 read++;
1620 cnt--;
1621
1622 /*
1623 * The parser is not finished with the last write,
1624 * continue reading the user input without skipping spaces.
1625 */
1626 if (!parser->cont) {
1627 /* skip white space */
1628 while (cnt && isspace(ch)) {
1629 ret = get_user(ch, ubuf++);
1630 if (ret)
1631 goto out;
1632 read++;
1633 cnt--;
1634 }
1635
1636 parser->idx = 0;
1637
1638 /* only spaces were written */
1639 if (isspace(ch) || !ch) {
1640 *ppos += read;
1641 ret = read;
1642 goto out;
1643 }
1644 }
1645
1646 /* read the non-space input */
1647 while (cnt && !isspace(ch) && ch) {
1648 if (parser->idx < parser->size - 1)
1649 parser->buffer[parser->idx++] = ch;
1650 else {
1651 ret = -EINVAL;
1652 goto out;
1653 }
1654 ret = get_user(ch, ubuf++);
1655 if (ret)
1656 goto out;
1657 read++;
1658 cnt--;
1659 }
1660
1661 /* We either got finished input or we have to wait for another call. */
1662 if (isspace(ch) || !ch) {
1663 parser->buffer[parser->idx] = 0;
1664 parser->cont = false;
1665 } else if (parser->idx < parser->size - 1) {
1666 parser->cont = true;
1667 parser->buffer[parser->idx++] = ch;
1668 /* Make sure the parsed string always terminates with '\0'. */
1669 parser->buffer[parser->idx] = 0;
1670 } else {
1671 ret = -EINVAL;
1672 goto out;
1673 }
1674
1675 *ppos += read;
1676 ret = read;
1677
1678 out:
1679 return ret;
1680 }
1681
1682 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1683 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1684 {
1685 int len;
1686
1687 if (trace_seq_used(s) <= s->seq.readpos)
1688 return -EBUSY;
1689
1690 len = trace_seq_used(s) - s->seq.readpos;
1691 if (cnt > len)
1692 cnt = len;
1693 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1694
1695 s->seq.readpos += cnt;
1696 return cnt;
1697 }
1698
1699 unsigned long __read_mostly tracing_thresh;
1700 static const struct file_operations tracing_max_lat_fops;
1701
1702 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1703 defined(CONFIG_FSNOTIFY)
1704
1705 static struct workqueue_struct *fsnotify_wq;
1706
latency_fsnotify_workfn(struct work_struct * work)1707 static void latency_fsnotify_workfn(struct work_struct *work)
1708 {
1709 struct trace_array *tr = container_of(work, struct trace_array,
1710 fsnotify_work);
1711 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1712 }
1713
latency_fsnotify_workfn_irq(struct irq_work * iwork)1714 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1715 {
1716 struct trace_array *tr = container_of(iwork, struct trace_array,
1717 fsnotify_irqwork);
1718 queue_work(fsnotify_wq, &tr->fsnotify_work);
1719 }
1720
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1721 static void trace_create_maxlat_file(struct trace_array *tr,
1722 struct dentry *d_tracer)
1723 {
1724 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1725 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1726 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1727 d_tracer, &tr->max_latency,
1728 &tracing_max_lat_fops);
1729 }
1730
latency_fsnotify_init(void)1731 __init static int latency_fsnotify_init(void)
1732 {
1733 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1734 WQ_UNBOUND | WQ_HIGHPRI, 0);
1735 if (!fsnotify_wq) {
1736 pr_err("Unable to allocate tr_max_lat_wq\n");
1737 return -ENOMEM;
1738 }
1739 return 0;
1740 }
1741
1742 late_initcall_sync(latency_fsnotify_init);
1743
latency_fsnotify(struct trace_array * tr)1744 void latency_fsnotify(struct trace_array *tr)
1745 {
1746 if (!fsnotify_wq)
1747 return;
1748 /*
1749 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1750 * possible that we are called from __schedule() or do_idle(), which
1751 * could cause a deadlock.
1752 */
1753 irq_work_queue(&tr->fsnotify_irqwork);
1754 }
1755
1756 /*
1757 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1758 * defined(CONFIG_FSNOTIFY)
1759 */
1760 #else
1761
1762 #define trace_create_maxlat_file(tr, d_tracer) \
1763 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1764 &tr->max_latency, &tracing_max_lat_fops)
1765
1766 #endif
1767
1768 #ifdef CONFIG_TRACER_MAX_TRACE
1769 /*
1770 * Copy the new maximum trace into the separate maximum-trace
1771 * structure. (this way the maximum trace is permanently saved,
1772 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1773 */
1774 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1775 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1776 {
1777 struct array_buffer *trace_buf = &tr->array_buffer;
1778 struct array_buffer *max_buf = &tr->max_buffer;
1779 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1780 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1781
1782 max_buf->cpu = cpu;
1783 max_buf->time_start = data->preempt_timestamp;
1784
1785 max_data->saved_latency = tr->max_latency;
1786 max_data->critical_start = data->critical_start;
1787 max_data->critical_end = data->critical_end;
1788
1789 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1790 max_data->pid = tsk->pid;
1791 /*
1792 * If tsk == current, then use current_uid(), as that does not use
1793 * RCU. The irq tracer can be called out of RCU scope.
1794 */
1795 if (tsk == current)
1796 max_data->uid = current_uid();
1797 else
1798 max_data->uid = task_uid(tsk);
1799
1800 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1801 max_data->policy = tsk->policy;
1802 max_data->rt_priority = tsk->rt_priority;
1803
1804 /* record this tasks comm */
1805 tracing_record_cmdline(tsk);
1806 latency_fsnotify(tr);
1807 }
1808
1809 /**
1810 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1811 * @tr: tracer
1812 * @tsk: the task with the latency
1813 * @cpu: The cpu that initiated the trace.
1814 * @cond_data: User data associated with a conditional snapshot
1815 *
1816 * Flip the buffers between the @tr and the max_tr and record information
1817 * about which task was the cause of this latency.
1818 */
1819 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1820 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1821 void *cond_data)
1822 {
1823 if (tr->stop_count)
1824 return;
1825
1826 WARN_ON_ONCE(!irqs_disabled());
1827
1828 if (!tr->allocated_snapshot) {
1829 /* Only the nop tracer should hit this when disabling */
1830 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1831 return;
1832 }
1833
1834 arch_spin_lock(&tr->max_lock);
1835
1836 /* Inherit the recordable setting from array_buffer */
1837 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1838 ring_buffer_record_on(tr->max_buffer.buffer);
1839 else
1840 ring_buffer_record_off(tr->max_buffer.buffer);
1841
1842 #ifdef CONFIG_TRACER_SNAPSHOT
1843 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1844 goto out_unlock;
1845 #endif
1846 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1847
1848 __update_max_tr(tr, tsk, cpu);
1849
1850 out_unlock:
1851 arch_spin_unlock(&tr->max_lock);
1852 }
1853
1854 /**
1855 * update_max_tr_single - only copy one trace over, and reset the rest
1856 * @tr: tracer
1857 * @tsk: task with the latency
1858 * @cpu: the cpu of the buffer to copy.
1859 *
1860 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1861 */
1862 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1863 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1864 {
1865 int ret;
1866
1867 if (tr->stop_count)
1868 return;
1869
1870 WARN_ON_ONCE(!irqs_disabled());
1871 if (!tr->allocated_snapshot) {
1872 /* Only the nop tracer should hit this when disabling */
1873 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874 return;
1875 }
1876
1877 arch_spin_lock(&tr->max_lock);
1878
1879 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1880
1881 if (ret == -EBUSY) {
1882 /*
1883 * We failed to swap the buffer due to a commit taking
1884 * place on this CPU. We fail to record, but we reset
1885 * the max trace buffer (no one writes directly to it)
1886 * and flag that it failed.
1887 * Another reason is resize is in progress.
1888 */
1889 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1890 "Failed to swap buffers due to commit or resize in progress\n");
1891 }
1892
1893 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1894
1895 __update_max_tr(tr, tsk, cpu);
1896 arch_spin_unlock(&tr->max_lock);
1897
1898 /* Any waiters on the old snapshot buffer need to wake up */
1899 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900 }
1901 #endif /* CONFIG_TRACER_MAX_TRACE */
1902
wait_on_pipe(struct trace_iterator * iter,int full)1903 static int wait_on_pipe(struct trace_iterator *iter, int full)
1904 {
1905 int ret;
1906
1907 /* Iterators are static, they should be filled or empty */
1908 if (trace_buffer_iter(iter, iter->cpu_file))
1909 return 0;
1910
1911 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1912
1913 #ifdef CONFIG_TRACER_MAX_TRACE
1914 /*
1915 * Make sure this is still the snapshot buffer, as if a snapshot were
1916 * to happen, this would now be the main buffer.
1917 */
1918 if (iter->snapshot)
1919 iter->array_buffer = &iter->tr->max_buffer;
1920 #endif
1921 return ret;
1922 }
1923
1924 #ifdef CONFIG_FTRACE_STARTUP_TEST
1925 static bool selftests_can_run;
1926
1927 struct trace_selftests {
1928 struct list_head list;
1929 struct tracer *type;
1930 };
1931
1932 static LIST_HEAD(postponed_selftests);
1933
save_selftest(struct tracer * type)1934 static int save_selftest(struct tracer *type)
1935 {
1936 struct trace_selftests *selftest;
1937
1938 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1939 if (!selftest)
1940 return -ENOMEM;
1941
1942 selftest->type = type;
1943 list_add(&selftest->list, &postponed_selftests);
1944 return 0;
1945 }
1946
run_tracer_selftest(struct tracer * type)1947 static int run_tracer_selftest(struct tracer *type)
1948 {
1949 struct trace_array *tr = &global_trace;
1950 struct tracer *saved_tracer = tr->current_trace;
1951 int ret;
1952
1953 if (!type->selftest || tracing_selftest_disabled)
1954 return 0;
1955
1956 /*
1957 * If a tracer registers early in boot up (before scheduling is
1958 * initialized and such), then do not run its selftests yet.
1959 * Instead, run it a little later in the boot process.
1960 */
1961 if (!selftests_can_run)
1962 return save_selftest(type);
1963
1964 /*
1965 * Run a selftest on this tracer.
1966 * Here we reset the trace buffer, and set the current
1967 * tracer to be this tracer. The tracer can then run some
1968 * internal tracing to verify that everything is in order.
1969 * If we fail, we do not register this tracer.
1970 */
1971 tracing_reset_online_cpus(&tr->array_buffer);
1972
1973 tr->current_trace = type;
1974
1975 #ifdef CONFIG_TRACER_MAX_TRACE
1976 if (type->use_max_tr) {
1977 /* If we expanded the buffers, make sure the max is expanded too */
1978 if (ring_buffer_expanded)
1979 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1980 RING_BUFFER_ALL_CPUS);
1981 tr->allocated_snapshot = true;
1982 }
1983 #endif
1984
1985 /* the test is responsible for initializing and enabling */
1986 pr_info("Testing tracer %s: ", type->name);
1987 ret = type->selftest(type, tr);
1988 /* the test is responsible for resetting too */
1989 tr->current_trace = saved_tracer;
1990 if (ret) {
1991 printk(KERN_CONT "FAILED!\n");
1992 /* Add the warning after printing 'FAILED' */
1993 WARN_ON(1);
1994 return -1;
1995 }
1996 /* Only reset on passing, to avoid touching corrupted buffers */
1997 tracing_reset_online_cpus(&tr->array_buffer);
1998
1999 #ifdef CONFIG_TRACER_MAX_TRACE
2000 if (type->use_max_tr) {
2001 tr->allocated_snapshot = false;
2002
2003 /* Shrink the max buffer again */
2004 if (ring_buffer_expanded)
2005 ring_buffer_resize(tr->max_buffer.buffer, 1,
2006 RING_BUFFER_ALL_CPUS);
2007 }
2008 #endif
2009
2010 printk(KERN_CONT "PASSED\n");
2011 return 0;
2012 }
2013
init_trace_selftests(void)2014 static __init int init_trace_selftests(void)
2015 {
2016 struct trace_selftests *p, *n;
2017 struct tracer *t, **last;
2018 int ret;
2019
2020 selftests_can_run = true;
2021
2022 mutex_lock(&trace_types_lock);
2023
2024 if (list_empty(&postponed_selftests))
2025 goto out;
2026
2027 pr_info("Running postponed tracer tests:\n");
2028
2029 tracing_selftest_running = true;
2030 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2031 /* This loop can take minutes when sanitizers are enabled, so
2032 * lets make sure we allow RCU processing.
2033 */
2034 cond_resched();
2035 ret = run_tracer_selftest(p->type);
2036 /* If the test fails, then warn and remove from available_tracers */
2037 if (ret < 0) {
2038 WARN(1, "tracer: %s failed selftest, disabling\n",
2039 p->type->name);
2040 last = &trace_types;
2041 for (t = trace_types; t; t = t->next) {
2042 if (t == p->type) {
2043 *last = t->next;
2044 break;
2045 }
2046 last = &t->next;
2047 }
2048 }
2049 list_del(&p->list);
2050 kfree(p);
2051 }
2052 tracing_selftest_running = false;
2053
2054 out:
2055 mutex_unlock(&trace_types_lock);
2056
2057 return 0;
2058 }
2059 core_initcall(init_trace_selftests);
2060 #else
run_tracer_selftest(struct tracer * type)2061 static inline int run_tracer_selftest(struct tracer *type)
2062 {
2063 return 0;
2064 }
2065 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2066
2067 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2068
2069 static void __init apply_trace_boot_options(void);
2070
2071 /**
2072 * register_tracer - register a tracer with the ftrace system.
2073 * @type: the plugin for the tracer
2074 *
2075 * Register a new plugin tracer.
2076 */
register_tracer(struct tracer * type)2077 int __init register_tracer(struct tracer *type)
2078 {
2079 struct tracer *t;
2080 int ret = 0;
2081
2082 if (!type->name) {
2083 pr_info("Tracer must have a name\n");
2084 return -1;
2085 }
2086
2087 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2088 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2089 return -1;
2090 }
2091
2092 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2093 pr_warn("Can not register tracer %s due to lockdown\n",
2094 type->name);
2095 return -EPERM;
2096 }
2097
2098 mutex_lock(&trace_types_lock);
2099
2100 tracing_selftest_running = true;
2101
2102 for (t = trace_types; t; t = t->next) {
2103 if (strcmp(type->name, t->name) == 0) {
2104 /* already found */
2105 pr_info("Tracer %s already registered\n",
2106 type->name);
2107 ret = -1;
2108 goto out;
2109 }
2110 }
2111
2112 if (!type->set_flag)
2113 type->set_flag = &dummy_set_flag;
2114 if (!type->flags) {
2115 /*allocate a dummy tracer_flags*/
2116 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2117 if (!type->flags) {
2118 ret = -ENOMEM;
2119 goto out;
2120 }
2121 type->flags->val = 0;
2122 type->flags->opts = dummy_tracer_opt;
2123 } else
2124 if (!type->flags->opts)
2125 type->flags->opts = dummy_tracer_opt;
2126
2127 /* store the tracer for __set_tracer_option */
2128 type->flags->trace = type;
2129
2130 ret = run_tracer_selftest(type);
2131 if (ret < 0)
2132 goto out;
2133
2134 type->next = trace_types;
2135 trace_types = type;
2136 add_tracer_options(&global_trace, type);
2137
2138 out:
2139 tracing_selftest_running = false;
2140 mutex_unlock(&trace_types_lock);
2141
2142 if (ret || !default_bootup_tracer)
2143 goto out_unlock;
2144
2145 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2146 goto out_unlock;
2147
2148 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2149 /* Do we want this tracer to start on bootup? */
2150 tracing_set_tracer(&global_trace, type->name);
2151 default_bootup_tracer = NULL;
2152
2153 apply_trace_boot_options();
2154
2155 /* disable other selftests, since this will break it. */
2156 disable_tracing_selftest("running a tracer");
2157
2158 out_unlock:
2159 return ret;
2160 }
2161
tracing_reset_cpu(struct array_buffer * buf,int cpu)2162 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2163 {
2164 struct trace_buffer *buffer = buf->buffer;
2165
2166 if (!buffer)
2167 return;
2168
2169 ring_buffer_record_disable(buffer);
2170
2171 /* Make sure all commits have finished */
2172 synchronize_rcu();
2173 ring_buffer_reset_cpu(buffer, cpu);
2174
2175 ring_buffer_record_enable(buffer);
2176 }
2177
tracing_reset_online_cpus(struct array_buffer * buf)2178 void tracing_reset_online_cpus(struct array_buffer *buf)
2179 {
2180 struct trace_buffer *buffer = buf->buffer;
2181
2182 if (!buffer)
2183 return;
2184
2185 ring_buffer_record_disable(buffer);
2186
2187 /* Make sure all commits have finished */
2188 synchronize_rcu();
2189
2190 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2191
2192 ring_buffer_reset_online_cpus(buffer);
2193
2194 ring_buffer_record_enable(buffer);
2195 }
2196
2197 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2198 void tracing_reset_all_online_cpus_unlocked(void)
2199 {
2200 struct trace_array *tr;
2201
2202 lockdep_assert_held(&trace_types_lock);
2203
2204 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2205 if (!tr->clear_trace)
2206 continue;
2207 tr->clear_trace = false;
2208 tracing_reset_online_cpus(&tr->array_buffer);
2209 #ifdef CONFIG_TRACER_MAX_TRACE
2210 tracing_reset_online_cpus(&tr->max_buffer);
2211 #endif
2212 }
2213 }
2214
tracing_reset_all_online_cpus(void)2215 void tracing_reset_all_online_cpus(void)
2216 {
2217 mutex_lock(&trace_types_lock);
2218 tracing_reset_all_online_cpus_unlocked();
2219 mutex_unlock(&trace_types_lock);
2220 }
2221
2222 /*
2223 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2224 * is the tgid last observed corresponding to pid=i.
2225 */
2226 static int *tgid_map;
2227
2228 /* The maximum valid index into tgid_map. */
2229 static size_t tgid_map_max;
2230
2231 #define SAVED_CMDLINES_DEFAULT 128
2232 #define NO_CMDLINE_MAP UINT_MAX
2233 /*
2234 * Preemption must be disabled before acquiring trace_cmdline_lock.
2235 * The various trace_arrays' max_lock must be acquired in a context
2236 * where interrupt is disabled.
2237 */
2238 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2239 struct saved_cmdlines_buffer {
2240 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2241 unsigned *map_cmdline_to_pid;
2242 unsigned cmdline_num;
2243 int cmdline_idx;
2244 char saved_cmdlines[];
2245 };
2246 static struct saved_cmdlines_buffer *savedcmd;
2247
get_saved_cmdlines(int idx)2248 static inline char *get_saved_cmdlines(int idx)
2249 {
2250 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2251 }
2252
set_cmdline(int idx,const char * cmdline)2253 static inline void set_cmdline(int idx, const char *cmdline)
2254 {
2255 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2256 }
2257
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2258 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2259 {
2260 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2261
2262 kfree(s->map_cmdline_to_pid);
2263 kmemleak_free(s);
2264 free_pages((unsigned long)s, order);
2265 }
2266
allocate_cmdlines_buffer(unsigned int val)2267 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2268 {
2269 struct saved_cmdlines_buffer *s;
2270 struct page *page;
2271 int orig_size, size;
2272 int order;
2273
2274 /* Figure out how much is needed to hold the given number of cmdlines */
2275 orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2276 order = get_order(orig_size);
2277 size = 1 << (order + PAGE_SHIFT);
2278 page = alloc_pages(GFP_KERNEL, order);
2279 if (!page)
2280 return NULL;
2281
2282 s = page_address(page);
2283 kmemleak_alloc(s, size, 1, GFP_KERNEL);
2284 memset(s, 0, sizeof(*s));
2285
2286 /* Round up to actual allocation */
2287 val = (size - sizeof(*s)) / TASK_COMM_LEN;
2288 s->cmdline_num = val;
2289
2290 s->map_cmdline_to_pid = kmalloc_array(val,
2291 sizeof(*s->map_cmdline_to_pid),
2292 GFP_KERNEL);
2293 if (!s->map_cmdline_to_pid) {
2294 free_saved_cmdlines_buffer(s);
2295 return NULL;
2296 }
2297
2298 s->cmdline_idx = 0;
2299 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2300 sizeof(s->map_pid_to_cmdline));
2301 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2302 val * sizeof(*s->map_cmdline_to_pid));
2303
2304 return s;
2305 }
2306
trace_create_savedcmd(void)2307 static int trace_create_savedcmd(void)
2308 {
2309 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2310
2311 return savedcmd ? 0 : -ENOMEM;
2312 }
2313
is_tracing_stopped(void)2314 int is_tracing_stopped(void)
2315 {
2316 return global_trace.stop_count;
2317 }
2318
tracing_start_tr(struct trace_array * tr)2319 static void tracing_start_tr(struct trace_array *tr)
2320 {
2321 struct trace_buffer *buffer;
2322 unsigned long flags;
2323
2324 if (tracing_disabled)
2325 return;
2326
2327 raw_spin_lock_irqsave(&tr->start_lock, flags);
2328 if (--tr->stop_count) {
2329 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2330 /* Someone screwed up their debugging */
2331 tr->stop_count = 0;
2332 }
2333 goto out;
2334 }
2335
2336 /* Prevent the buffers from switching */
2337 arch_spin_lock(&tr->max_lock);
2338
2339 buffer = tr->array_buffer.buffer;
2340 if (buffer)
2341 ring_buffer_record_enable(buffer);
2342
2343 #ifdef CONFIG_TRACER_MAX_TRACE
2344 buffer = tr->max_buffer.buffer;
2345 if (buffer)
2346 ring_buffer_record_enable(buffer);
2347 #endif
2348
2349 arch_spin_unlock(&tr->max_lock);
2350
2351 out:
2352 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2353 }
2354
2355 /**
2356 * tracing_start - quick start of the tracer
2357 *
2358 * If tracing is enabled but was stopped by tracing_stop,
2359 * this will start the tracer back up.
2360 */
tracing_start(void)2361 void tracing_start(void)
2362
2363 {
2364 return tracing_start_tr(&global_trace);
2365 }
2366
tracing_stop_tr(struct trace_array * tr)2367 static void tracing_stop_tr(struct trace_array *tr)
2368 {
2369 struct trace_buffer *buffer;
2370 unsigned long flags;
2371
2372 raw_spin_lock_irqsave(&tr->start_lock, flags);
2373 if (tr->stop_count++)
2374 goto out;
2375
2376 /* Prevent the buffers from switching */
2377 arch_spin_lock(&tr->max_lock);
2378
2379 buffer = tr->array_buffer.buffer;
2380 if (buffer)
2381 ring_buffer_record_disable(buffer);
2382
2383 #ifdef CONFIG_TRACER_MAX_TRACE
2384 buffer = tr->max_buffer.buffer;
2385 if (buffer)
2386 ring_buffer_record_disable(buffer);
2387 #endif
2388
2389 arch_spin_unlock(&tr->max_lock);
2390
2391 out:
2392 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2393 }
2394
2395 /**
2396 * tracing_stop - quick stop of the tracer
2397 *
2398 * Light weight way to stop tracing. Use in conjunction with
2399 * tracing_start.
2400 */
tracing_stop(void)2401 void tracing_stop(void)
2402 {
2403 return tracing_stop_tr(&global_trace);
2404 }
2405
trace_save_cmdline(struct task_struct * tsk)2406 static int trace_save_cmdline(struct task_struct *tsk)
2407 {
2408 unsigned tpid, idx;
2409
2410 /* treat recording of idle task as a success */
2411 if (!tsk->pid)
2412 return 1;
2413
2414 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2415
2416 /*
2417 * It's not the end of the world if we don't get
2418 * the lock, but we also don't want to spin
2419 * nor do we want to disable interrupts,
2420 * so if we miss here, then better luck next time.
2421 *
2422 * This is called within the scheduler and wake up, so interrupts
2423 * had better been disabled and run queue lock been held.
2424 */
2425 lockdep_assert_preemption_disabled();
2426 if (!arch_spin_trylock(&trace_cmdline_lock))
2427 return 0;
2428
2429 idx = savedcmd->map_pid_to_cmdline[tpid];
2430 if (idx == NO_CMDLINE_MAP) {
2431 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2432
2433 savedcmd->map_pid_to_cmdline[tpid] = idx;
2434 savedcmd->cmdline_idx = idx;
2435 }
2436
2437 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2438 set_cmdline(idx, tsk->comm);
2439
2440 arch_spin_unlock(&trace_cmdline_lock);
2441
2442 return 1;
2443 }
2444
__trace_find_cmdline(int pid,char comm[])2445 static void __trace_find_cmdline(int pid, char comm[])
2446 {
2447 unsigned map;
2448 int tpid;
2449
2450 if (!pid) {
2451 strcpy(comm, "<idle>");
2452 return;
2453 }
2454
2455 if (WARN_ON_ONCE(pid < 0)) {
2456 strcpy(comm, "<XXX>");
2457 return;
2458 }
2459
2460 tpid = pid & (PID_MAX_DEFAULT - 1);
2461 map = savedcmd->map_pid_to_cmdline[tpid];
2462 if (map != NO_CMDLINE_MAP) {
2463 tpid = savedcmd->map_cmdline_to_pid[map];
2464 if (tpid == pid) {
2465 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2466 return;
2467 }
2468 }
2469 strcpy(comm, "<...>");
2470 }
2471
trace_find_cmdline(int pid,char comm[])2472 void trace_find_cmdline(int pid, char comm[])
2473 {
2474 preempt_disable();
2475 arch_spin_lock(&trace_cmdline_lock);
2476
2477 __trace_find_cmdline(pid, comm);
2478
2479 arch_spin_unlock(&trace_cmdline_lock);
2480 preempt_enable();
2481 }
2482
trace_find_tgid_ptr(int pid)2483 static int *trace_find_tgid_ptr(int pid)
2484 {
2485 /*
2486 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2487 * if we observe a non-NULL tgid_map then we also observe the correct
2488 * tgid_map_max.
2489 */
2490 int *map = smp_load_acquire(&tgid_map);
2491
2492 if (unlikely(!map || pid > tgid_map_max))
2493 return NULL;
2494
2495 return &map[pid];
2496 }
2497
trace_find_tgid(int pid)2498 int trace_find_tgid(int pid)
2499 {
2500 int *ptr = trace_find_tgid_ptr(pid);
2501
2502 return ptr ? *ptr : 0;
2503 }
2504
trace_save_tgid(struct task_struct * tsk)2505 static int trace_save_tgid(struct task_struct *tsk)
2506 {
2507 int *ptr;
2508
2509 /* treat recording of idle task as a success */
2510 if (!tsk->pid)
2511 return 1;
2512
2513 ptr = trace_find_tgid_ptr(tsk->pid);
2514 if (!ptr)
2515 return 0;
2516
2517 *ptr = tsk->tgid;
2518 return 1;
2519 }
2520
tracing_record_taskinfo_skip(int flags)2521 static bool tracing_record_taskinfo_skip(int flags)
2522 {
2523 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2524 return true;
2525 if (!__this_cpu_read(trace_taskinfo_save))
2526 return true;
2527 return false;
2528 }
2529
2530 /**
2531 * tracing_record_taskinfo - record the task info of a task
2532 *
2533 * @task: task to record
2534 * @flags: TRACE_RECORD_CMDLINE for recording comm
2535 * TRACE_RECORD_TGID for recording tgid
2536 */
tracing_record_taskinfo(struct task_struct * task,int flags)2537 void tracing_record_taskinfo(struct task_struct *task, int flags)
2538 {
2539 bool done;
2540
2541 if (tracing_record_taskinfo_skip(flags))
2542 return;
2543
2544 /*
2545 * Record as much task information as possible. If some fail, continue
2546 * to try to record the others.
2547 */
2548 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2549 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2550
2551 /* If recording any information failed, retry again soon. */
2552 if (!done)
2553 return;
2554
2555 __this_cpu_write(trace_taskinfo_save, false);
2556 }
2557
2558 /**
2559 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2560 *
2561 * @prev: previous task during sched_switch
2562 * @next: next task during sched_switch
2563 * @flags: TRACE_RECORD_CMDLINE for recording comm
2564 * TRACE_RECORD_TGID for recording tgid
2565 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2566 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2567 struct task_struct *next, int flags)
2568 {
2569 bool done;
2570
2571 if (tracing_record_taskinfo_skip(flags))
2572 return;
2573
2574 /*
2575 * Record as much task information as possible. If some fail, continue
2576 * to try to record the others.
2577 */
2578 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2579 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2580 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2581 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2582
2583 /* If recording any information failed, retry again soon. */
2584 if (!done)
2585 return;
2586
2587 __this_cpu_write(trace_taskinfo_save, false);
2588 }
2589
2590 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2591 void tracing_record_cmdline(struct task_struct *task)
2592 {
2593 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2594 }
2595
tracing_record_tgid(struct task_struct * task)2596 void tracing_record_tgid(struct task_struct *task)
2597 {
2598 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2599 }
2600
2601 /*
2602 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2603 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2604 * simplifies those functions and keeps them in sync.
2605 */
trace_handle_return(struct trace_seq * s)2606 enum print_line_t trace_handle_return(struct trace_seq *s)
2607 {
2608 return trace_seq_has_overflowed(s) ?
2609 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2610 }
2611 EXPORT_SYMBOL_GPL(trace_handle_return);
2612
2613 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2614 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2615 unsigned long flags, int pc)
2616 {
2617 struct task_struct *tsk = current;
2618
2619 entry->preempt_count = pc & 0xff;
2620 entry->pid = (tsk) ? tsk->pid : 0;
2621 entry->type = type;
2622 entry->flags =
2623 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2624 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2625 #else
2626 TRACE_FLAG_IRQS_NOSUPPORT |
2627 #endif
2628 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2629 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2630 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2631 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2632 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2633 }
2634 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2635
2636 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2637 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2638 int type,
2639 unsigned long len,
2640 unsigned long flags, int pc)
2641 {
2642 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2643 }
2644
2645 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2646 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2647 static int trace_buffered_event_ref;
2648
2649 /**
2650 * trace_buffered_event_enable - enable buffering events
2651 *
2652 * When events are being filtered, it is quicker to use a temporary
2653 * buffer to write the event data into if there's a likely chance
2654 * that it will not be committed. The discard of the ring buffer
2655 * is not as fast as committing, and is much slower than copying
2656 * a commit.
2657 *
2658 * When an event is to be filtered, allocate per cpu buffers to
2659 * write the event data into, and if the event is filtered and discarded
2660 * it is simply dropped, otherwise, the entire data is to be committed
2661 * in one shot.
2662 */
trace_buffered_event_enable(void)2663 void trace_buffered_event_enable(void)
2664 {
2665 struct ring_buffer_event *event;
2666 struct page *page;
2667 int cpu;
2668
2669 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2670
2671 if (trace_buffered_event_ref++)
2672 return;
2673
2674 for_each_tracing_cpu(cpu) {
2675 page = alloc_pages_node(cpu_to_node(cpu),
2676 GFP_KERNEL | __GFP_NORETRY, 0);
2677 /* This is just an optimization and can handle failures */
2678 if (!page) {
2679 pr_err("Failed to allocate event buffer\n");
2680 break;
2681 }
2682
2683 event = page_address(page);
2684 memset(event, 0, sizeof(*event));
2685
2686 per_cpu(trace_buffered_event, cpu) = event;
2687
2688 preempt_disable();
2689 if (cpu == smp_processor_id() &&
2690 __this_cpu_read(trace_buffered_event) !=
2691 per_cpu(trace_buffered_event, cpu))
2692 WARN_ON_ONCE(1);
2693 preempt_enable();
2694 }
2695 }
2696
enable_trace_buffered_event(void * data)2697 static void enable_trace_buffered_event(void *data)
2698 {
2699 /* Probably not needed, but do it anyway */
2700 smp_rmb();
2701 this_cpu_dec(trace_buffered_event_cnt);
2702 }
2703
disable_trace_buffered_event(void * data)2704 static void disable_trace_buffered_event(void *data)
2705 {
2706 this_cpu_inc(trace_buffered_event_cnt);
2707 }
2708
2709 /**
2710 * trace_buffered_event_disable - disable buffering events
2711 *
2712 * When a filter is removed, it is faster to not use the buffered
2713 * events, and to commit directly into the ring buffer. Free up
2714 * the temp buffers when there are no more users. This requires
2715 * special synchronization with current events.
2716 */
trace_buffered_event_disable(void)2717 void trace_buffered_event_disable(void)
2718 {
2719 int cpu;
2720
2721 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2722
2723 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2724 return;
2725
2726 if (--trace_buffered_event_ref)
2727 return;
2728
2729 /* For each CPU, set the buffer as used. */
2730 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2731 NULL, true);
2732
2733 /* Wait for all current users to finish */
2734 synchronize_rcu();
2735
2736 for_each_tracing_cpu(cpu) {
2737 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2738 per_cpu(trace_buffered_event, cpu) = NULL;
2739 }
2740
2741 /*
2742 * Wait for all CPUs that potentially started checking if they can use
2743 * their event buffer only after the previous synchronize_rcu() call and
2744 * they still read a valid pointer from trace_buffered_event. It must be
2745 * ensured they don't see cleared trace_buffered_event_cnt else they
2746 * could wrongly decide to use the pointed-to buffer which is now freed.
2747 */
2748 synchronize_rcu();
2749
2750 /* For each CPU, relinquish the buffer */
2751 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2752 true);
2753 }
2754
2755 static struct trace_buffer *temp_buffer;
2756
2757 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2758 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2759 struct trace_event_file *trace_file,
2760 int type, unsigned long len,
2761 unsigned long flags, int pc)
2762 {
2763 struct ring_buffer_event *entry;
2764 int val;
2765
2766 *current_rb = trace_file->tr->array_buffer.buffer;
2767
2768 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2769 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2770 (entry = this_cpu_read(trace_buffered_event))) {
2771 /* Try to use the per cpu buffer first */
2772 val = this_cpu_inc_return(trace_buffered_event_cnt);
2773 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2774 trace_event_setup(entry, type, flags, pc);
2775 entry->array[0] = len;
2776 return entry;
2777 }
2778 this_cpu_dec(trace_buffered_event_cnt);
2779 }
2780
2781 entry = __trace_buffer_lock_reserve(*current_rb,
2782 type, len, flags, pc);
2783 /*
2784 * If tracing is off, but we have triggers enabled
2785 * we still need to look at the event data. Use the temp_buffer
2786 * to store the trace event for the trigger to use. It's recursive
2787 * safe and will not be recorded anywhere.
2788 */
2789 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2790 *current_rb = temp_buffer;
2791 entry = __trace_buffer_lock_reserve(*current_rb,
2792 type, len, flags, pc);
2793 }
2794 return entry;
2795 }
2796 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2797
2798 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2799 static DEFINE_MUTEX(tracepoint_printk_mutex);
2800
output_printk(struct trace_event_buffer * fbuffer)2801 static void output_printk(struct trace_event_buffer *fbuffer)
2802 {
2803 struct trace_event_call *event_call;
2804 struct trace_event_file *file;
2805 struct trace_event *event;
2806 unsigned long flags;
2807 struct trace_iterator *iter = tracepoint_print_iter;
2808
2809 /* We should never get here if iter is NULL */
2810 if (WARN_ON_ONCE(!iter))
2811 return;
2812
2813 event_call = fbuffer->trace_file->event_call;
2814 if (!event_call || !event_call->event.funcs ||
2815 !event_call->event.funcs->trace)
2816 return;
2817
2818 file = fbuffer->trace_file;
2819 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2820 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2821 !filter_match_preds(file->filter, fbuffer->entry)))
2822 return;
2823
2824 event = &fbuffer->trace_file->event_call->event;
2825
2826 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2827 trace_seq_init(&iter->seq);
2828 iter->ent = fbuffer->entry;
2829 event_call->event.funcs->trace(iter, 0, event);
2830 trace_seq_putc(&iter->seq, 0);
2831 printk("%s", iter->seq.buffer);
2832
2833 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2834 }
2835
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2836 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2837 void *buffer, size_t *lenp,
2838 loff_t *ppos)
2839 {
2840 int save_tracepoint_printk;
2841 int ret;
2842
2843 mutex_lock(&tracepoint_printk_mutex);
2844 save_tracepoint_printk = tracepoint_printk;
2845
2846 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2847
2848 /*
2849 * This will force exiting early, as tracepoint_printk
2850 * is always zero when tracepoint_printk_iter is not allocated
2851 */
2852 if (!tracepoint_print_iter)
2853 tracepoint_printk = 0;
2854
2855 if (save_tracepoint_printk == tracepoint_printk)
2856 goto out;
2857
2858 if (tracepoint_printk)
2859 static_key_enable(&tracepoint_printk_key.key);
2860 else
2861 static_key_disable(&tracepoint_printk_key.key);
2862
2863 out:
2864 mutex_unlock(&tracepoint_printk_mutex);
2865
2866 return ret;
2867 }
2868
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2869 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2870 {
2871 if (static_key_false(&tracepoint_printk_key.key))
2872 output_printk(fbuffer);
2873
2874 if (static_branch_unlikely(&trace_event_exports_enabled))
2875 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2876 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2877 fbuffer->event, fbuffer->entry,
2878 fbuffer->flags, fbuffer->pc, fbuffer->regs);
2879 }
2880 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2881
2882 /*
2883 * Skip 3:
2884 *
2885 * trace_buffer_unlock_commit_regs()
2886 * trace_event_buffer_commit()
2887 * trace_event_raw_event_xxx()
2888 */
2889 # define STACK_SKIP 3
2890
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2891 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2892 struct trace_buffer *buffer,
2893 struct ring_buffer_event *event,
2894 unsigned long flags, int pc,
2895 struct pt_regs *regs)
2896 {
2897 __buffer_unlock_commit(buffer, event);
2898
2899 /*
2900 * If regs is not set, then skip the necessary functions.
2901 * Note, we can still get here via blktrace, wakeup tracer
2902 * and mmiotrace, but that's ok if they lose a function or
2903 * two. They are not that meaningful.
2904 */
2905 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2906 ftrace_trace_userstack(tr, buffer, flags, pc);
2907 }
2908
2909 /*
2910 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2911 */
2912 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2913 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2914 struct ring_buffer_event *event)
2915 {
2916 __buffer_unlock_commit(buffer, event);
2917 }
2918
2919 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2920 trace_function(struct trace_array *tr,
2921 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2922 int pc)
2923 {
2924 struct trace_event_call *call = &event_function;
2925 struct trace_buffer *buffer = tr->array_buffer.buffer;
2926 struct ring_buffer_event *event;
2927 struct ftrace_entry *entry;
2928
2929 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2930 flags, pc);
2931 if (!event)
2932 return;
2933 entry = ring_buffer_event_data(event);
2934 entry->ip = ip;
2935 entry->parent_ip = parent_ip;
2936
2937 if (!call_filter_check_discard(call, entry, buffer, event)) {
2938 if (static_branch_unlikely(&trace_function_exports_enabled))
2939 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2940 __buffer_unlock_commit(buffer, event);
2941 }
2942 }
2943
2944 #ifdef CONFIG_STACKTRACE
2945
2946 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2947 #define FTRACE_KSTACK_NESTING 4
2948
2949 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2950
2951 struct ftrace_stack {
2952 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2953 };
2954
2955
2956 struct ftrace_stacks {
2957 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2958 };
2959
2960 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2961 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2962
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2963 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2964 unsigned long flags,
2965 int skip, int pc, struct pt_regs *regs)
2966 {
2967 struct trace_event_call *call = &event_kernel_stack;
2968 struct ring_buffer_event *event;
2969 unsigned int size, nr_entries;
2970 struct ftrace_stack *fstack;
2971 struct stack_entry *entry;
2972 int stackidx;
2973
2974 /*
2975 * Add one, for this function and the call to save_stack_trace()
2976 * If regs is set, then these functions will not be in the way.
2977 */
2978 #ifndef CONFIG_UNWINDER_ORC
2979 if (!regs)
2980 skip++;
2981 #endif
2982
2983 preempt_disable_notrace();
2984
2985 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2986
2987 /* This should never happen. If it does, yell once and skip */
2988 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2989 goto out;
2990
2991 /*
2992 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2993 * interrupt will either see the value pre increment or post
2994 * increment. If the interrupt happens pre increment it will have
2995 * restored the counter when it returns. We just need a barrier to
2996 * keep gcc from moving things around.
2997 */
2998 barrier();
2999
3000 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3001 size = ARRAY_SIZE(fstack->calls);
3002
3003 if (regs) {
3004 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3005 size, skip);
3006 } else {
3007 nr_entries = stack_trace_save(fstack->calls, size, skip);
3008 }
3009
3010 size = nr_entries * sizeof(unsigned long);
3011 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3012 (sizeof(*entry) - sizeof(entry->caller)) + size,
3013 flags, pc);
3014 if (!event)
3015 goto out;
3016 entry = ring_buffer_event_data(event);
3017
3018 memcpy(&entry->caller, fstack->calls, size);
3019 entry->size = nr_entries;
3020
3021 if (!call_filter_check_discard(call, entry, buffer, event))
3022 __buffer_unlock_commit(buffer, event);
3023
3024 out:
3025 /* Again, don't let gcc optimize things here */
3026 barrier();
3027 __this_cpu_dec(ftrace_stack_reserve);
3028 preempt_enable_notrace();
3029
3030 }
3031
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)3032 static inline void ftrace_trace_stack(struct trace_array *tr,
3033 struct trace_buffer *buffer,
3034 unsigned long flags,
3035 int skip, int pc, struct pt_regs *regs)
3036 {
3037 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3038 return;
3039
3040 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3041 }
3042
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)3043 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3044 int pc)
3045 {
3046 struct trace_buffer *buffer = tr->array_buffer.buffer;
3047
3048 if (rcu_is_watching()) {
3049 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3050 return;
3051 }
3052
3053 /*
3054 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3055 * but if the above rcu_is_watching() failed, then the NMI
3056 * triggered someplace critical, and rcu_irq_enter() should
3057 * not be called from NMI.
3058 */
3059 if (unlikely(in_nmi()))
3060 return;
3061
3062 rcu_irq_enter_irqson();
3063 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3064 rcu_irq_exit_irqson();
3065 }
3066
3067 /**
3068 * trace_dump_stack - record a stack back trace in the trace buffer
3069 * @skip: Number of functions to skip (helper handlers)
3070 */
trace_dump_stack(int skip)3071 void trace_dump_stack(int skip)
3072 {
3073 unsigned long flags;
3074
3075 if (tracing_disabled || tracing_selftest_running)
3076 return;
3077
3078 local_save_flags(flags);
3079
3080 #ifndef CONFIG_UNWINDER_ORC
3081 /* Skip 1 to skip this function. */
3082 skip++;
3083 #endif
3084 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3085 flags, skip, preempt_count(), NULL);
3086 }
3087 EXPORT_SYMBOL_GPL(trace_dump_stack);
3088
3089 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3090 static DEFINE_PER_CPU(int, user_stack_count);
3091
3092 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3093 ftrace_trace_userstack(struct trace_array *tr,
3094 struct trace_buffer *buffer, unsigned long flags, int pc)
3095 {
3096 struct trace_event_call *call = &event_user_stack;
3097 struct ring_buffer_event *event;
3098 struct userstack_entry *entry;
3099
3100 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3101 return;
3102
3103 /*
3104 * NMIs can not handle page faults, even with fix ups.
3105 * The save user stack can (and often does) fault.
3106 */
3107 if (unlikely(in_nmi()))
3108 return;
3109
3110 /*
3111 * prevent recursion, since the user stack tracing may
3112 * trigger other kernel events.
3113 */
3114 preempt_disable();
3115 if (__this_cpu_read(user_stack_count))
3116 goto out;
3117
3118 __this_cpu_inc(user_stack_count);
3119
3120 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3121 sizeof(*entry), flags, pc);
3122 if (!event)
3123 goto out_drop_count;
3124 entry = ring_buffer_event_data(event);
3125
3126 entry->tgid = current->tgid;
3127 memset(&entry->caller, 0, sizeof(entry->caller));
3128
3129 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3130 if (!call_filter_check_discard(call, entry, buffer, event))
3131 __buffer_unlock_commit(buffer, event);
3132
3133 out_drop_count:
3134 __this_cpu_dec(user_stack_count);
3135 out:
3136 preempt_enable();
3137 }
3138 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3139 static void ftrace_trace_userstack(struct trace_array *tr,
3140 struct trace_buffer *buffer,
3141 unsigned long flags, int pc)
3142 {
3143 }
3144 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3145
3146 #endif /* CONFIG_STACKTRACE */
3147
3148 /* created for use with alloc_percpu */
3149 struct trace_buffer_struct {
3150 int nesting;
3151 char buffer[4][TRACE_BUF_SIZE];
3152 };
3153
3154 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3155
3156 /*
3157 * Thise allows for lockless recording. If we're nested too deeply, then
3158 * this returns NULL.
3159 */
get_trace_buf(void)3160 static char *get_trace_buf(void)
3161 {
3162 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3163
3164 if (!trace_percpu_buffer || buffer->nesting >= 4)
3165 return NULL;
3166
3167 buffer->nesting++;
3168
3169 /* Interrupts must see nesting incremented before we use the buffer */
3170 barrier();
3171 return &buffer->buffer[buffer->nesting - 1][0];
3172 }
3173
put_trace_buf(void)3174 static void put_trace_buf(void)
3175 {
3176 /* Don't let the decrement of nesting leak before this */
3177 barrier();
3178 this_cpu_dec(trace_percpu_buffer->nesting);
3179 }
3180
alloc_percpu_trace_buffer(void)3181 static int alloc_percpu_trace_buffer(void)
3182 {
3183 struct trace_buffer_struct __percpu *buffers;
3184
3185 if (trace_percpu_buffer)
3186 return 0;
3187
3188 buffers = alloc_percpu(struct trace_buffer_struct);
3189 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3190 return -ENOMEM;
3191
3192 trace_percpu_buffer = buffers;
3193 return 0;
3194 }
3195
3196 static int buffers_allocated;
3197
trace_printk_init_buffers(void)3198 void trace_printk_init_buffers(void)
3199 {
3200 if (buffers_allocated)
3201 return;
3202
3203 if (alloc_percpu_trace_buffer())
3204 return;
3205
3206 /* trace_printk() is for debug use only. Don't use it in production. */
3207
3208 pr_warn("\n");
3209 pr_warn("**********************************************************\n");
3210 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3211 pr_warn("** **\n");
3212 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3213 pr_warn("** **\n");
3214 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3215 pr_warn("** unsafe for production use. **\n");
3216 pr_warn("** **\n");
3217 pr_warn("** If you see this message and you are not debugging **\n");
3218 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3219 pr_warn("** **\n");
3220 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3221 pr_warn("**********************************************************\n");
3222
3223 /* Expand the buffers to set size */
3224 tracing_update_buffers();
3225
3226 buffers_allocated = 1;
3227
3228 /*
3229 * trace_printk_init_buffers() can be called by modules.
3230 * If that happens, then we need to start cmdline recording
3231 * directly here. If the global_trace.buffer is already
3232 * allocated here, then this was called by module code.
3233 */
3234 if (global_trace.array_buffer.buffer)
3235 tracing_start_cmdline_record();
3236 }
3237 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3238
trace_printk_start_comm(void)3239 void trace_printk_start_comm(void)
3240 {
3241 /* Start tracing comms if trace printk is set */
3242 if (!buffers_allocated)
3243 return;
3244 tracing_start_cmdline_record();
3245 }
3246
trace_printk_start_stop_comm(int enabled)3247 static void trace_printk_start_stop_comm(int enabled)
3248 {
3249 if (!buffers_allocated)
3250 return;
3251
3252 if (enabled)
3253 tracing_start_cmdline_record();
3254 else
3255 tracing_stop_cmdline_record();
3256 }
3257
3258 /**
3259 * trace_vbprintk - write binary msg to tracing buffer
3260 * @ip: The address of the caller
3261 * @fmt: The string format to write to the buffer
3262 * @args: Arguments for @fmt
3263 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3264 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3265 {
3266 struct trace_event_call *call = &event_bprint;
3267 struct ring_buffer_event *event;
3268 struct trace_buffer *buffer;
3269 struct trace_array *tr = &global_trace;
3270 struct bprint_entry *entry;
3271 unsigned long flags;
3272 char *tbuffer;
3273 int len = 0, size, pc;
3274
3275 if (unlikely(tracing_selftest_running || tracing_disabled))
3276 return 0;
3277
3278 /* Don't pollute graph traces with trace_vprintk internals */
3279 pause_graph_tracing();
3280
3281 pc = preempt_count();
3282 preempt_disable_notrace();
3283
3284 tbuffer = get_trace_buf();
3285 if (!tbuffer) {
3286 len = 0;
3287 goto out_nobuffer;
3288 }
3289
3290 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3291
3292 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3293 goto out_put;
3294
3295 local_save_flags(flags);
3296 size = sizeof(*entry) + sizeof(u32) * len;
3297 buffer = tr->array_buffer.buffer;
3298 ring_buffer_nest_start(buffer);
3299 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3300 flags, pc);
3301 if (!event)
3302 goto out;
3303 entry = ring_buffer_event_data(event);
3304 entry->ip = ip;
3305 entry->fmt = fmt;
3306
3307 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3308 if (!call_filter_check_discard(call, entry, buffer, event)) {
3309 __buffer_unlock_commit(buffer, event);
3310 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3311 }
3312
3313 out:
3314 ring_buffer_nest_end(buffer);
3315 out_put:
3316 put_trace_buf();
3317
3318 out_nobuffer:
3319 preempt_enable_notrace();
3320 unpause_graph_tracing();
3321
3322 return len;
3323 }
3324 EXPORT_SYMBOL_GPL(trace_vbprintk);
3325
3326 __printf(3, 0)
3327 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3328 __trace_array_vprintk(struct trace_buffer *buffer,
3329 unsigned long ip, const char *fmt, va_list args)
3330 {
3331 struct trace_event_call *call = &event_print;
3332 struct ring_buffer_event *event;
3333 int len = 0, size, pc;
3334 struct print_entry *entry;
3335 unsigned long flags;
3336 char *tbuffer;
3337
3338 if (tracing_disabled || tracing_selftest_running)
3339 return 0;
3340
3341 /* Don't pollute graph traces with trace_vprintk internals */
3342 pause_graph_tracing();
3343
3344 pc = preempt_count();
3345 preempt_disable_notrace();
3346
3347
3348 tbuffer = get_trace_buf();
3349 if (!tbuffer) {
3350 len = 0;
3351 goto out_nobuffer;
3352 }
3353
3354 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3355
3356 local_save_flags(flags);
3357 size = sizeof(*entry) + len + 1;
3358 ring_buffer_nest_start(buffer);
3359 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3360 flags, pc);
3361 if (!event)
3362 goto out;
3363 entry = ring_buffer_event_data(event);
3364 entry->ip = ip;
3365
3366 memcpy(&entry->buf, tbuffer, len + 1);
3367 if (!call_filter_check_discard(call, entry, buffer, event)) {
3368 __buffer_unlock_commit(buffer, event);
3369 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3370 }
3371
3372 out:
3373 ring_buffer_nest_end(buffer);
3374 put_trace_buf();
3375
3376 out_nobuffer:
3377 preempt_enable_notrace();
3378 unpause_graph_tracing();
3379
3380 return len;
3381 }
3382
3383 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3384 int trace_array_vprintk(struct trace_array *tr,
3385 unsigned long ip, const char *fmt, va_list args)
3386 {
3387 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3388 }
3389
3390 /**
3391 * trace_array_printk - Print a message to a specific instance
3392 * @tr: The instance trace_array descriptor
3393 * @ip: The instruction pointer that this is called from.
3394 * @fmt: The format to print (printf format)
3395 *
3396 * If a subsystem sets up its own instance, they have the right to
3397 * printk strings into their tracing instance buffer using this
3398 * function. Note, this function will not write into the top level
3399 * buffer (use trace_printk() for that), as writing into the top level
3400 * buffer should only have events that can be individually disabled.
3401 * trace_printk() is only used for debugging a kernel, and should not
3402 * be ever encorporated in normal use.
3403 *
3404 * trace_array_printk() can be used, as it will not add noise to the
3405 * top level tracing buffer.
3406 *
3407 * Note, trace_array_init_printk() must be called on @tr before this
3408 * can be used.
3409 */
3410 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3411 int trace_array_printk(struct trace_array *tr,
3412 unsigned long ip, const char *fmt, ...)
3413 {
3414 int ret;
3415 va_list ap;
3416
3417 if (!tr)
3418 return -ENOENT;
3419
3420 /* This is only allowed for created instances */
3421 if (tr == &global_trace)
3422 return 0;
3423
3424 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3425 return 0;
3426
3427 va_start(ap, fmt);
3428 ret = trace_array_vprintk(tr, ip, fmt, ap);
3429 va_end(ap);
3430 return ret;
3431 }
3432 EXPORT_SYMBOL_GPL(trace_array_printk);
3433
3434 /**
3435 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3436 * @tr: The trace array to initialize the buffers for
3437 *
3438 * As trace_array_printk() only writes into instances, they are OK to
3439 * have in the kernel (unlike trace_printk()). This needs to be called
3440 * before trace_array_printk() can be used on a trace_array.
3441 */
trace_array_init_printk(struct trace_array * tr)3442 int trace_array_init_printk(struct trace_array *tr)
3443 {
3444 if (!tr)
3445 return -ENOENT;
3446
3447 /* This is only allowed for created instances */
3448 if (tr == &global_trace)
3449 return -EINVAL;
3450
3451 return alloc_percpu_trace_buffer();
3452 }
3453 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3454
3455 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3456 int trace_array_printk_buf(struct trace_buffer *buffer,
3457 unsigned long ip, const char *fmt, ...)
3458 {
3459 int ret;
3460 va_list ap;
3461
3462 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3463 return 0;
3464
3465 va_start(ap, fmt);
3466 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3467 va_end(ap);
3468 return ret;
3469 }
3470
3471 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3472 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3473 {
3474 return trace_array_vprintk(&global_trace, ip, fmt, args);
3475 }
3476 EXPORT_SYMBOL_GPL(trace_vprintk);
3477
trace_iterator_increment(struct trace_iterator * iter)3478 static void trace_iterator_increment(struct trace_iterator *iter)
3479 {
3480 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3481
3482 iter->idx++;
3483 if (buf_iter)
3484 ring_buffer_iter_advance(buf_iter);
3485 }
3486
3487 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3488 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3489 unsigned long *lost_events)
3490 {
3491 struct ring_buffer_event *event;
3492 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3493
3494 if (buf_iter) {
3495 event = ring_buffer_iter_peek(buf_iter, ts);
3496 if (lost_events)
3497 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3498 (unsigned long)-1 : 0;
3499 } else {
3500 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3501 lost_events);
3502 }
3503
3504 if (event) {
3505 iter->ent_size = ring_buffer_event_length(event);
3506 return ring_buffer_event_data(event);
3507 }
3508 iter->ent_size = 0;
3509 return NULL;
3510 }
3511
3512 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3513 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3514 unsigned long *missing_events, u64 *ent_ts)
3515 {
3516 struct trace_buffer *buffer = iter->array_buffer->buffer;
3517 struct trace_entry *ent, *next = NULL;
3518 unsigned long lost_events = 0, next_lost = 0;
3519 int cpu_file = iter->cpu_file;
3520 u64 next_ts = 0, ts;
3521 int next_cpu = -1;
3522 int next_size = 0;
3523 int cpu;
3524
3525 /*
3526 * If we are in a per_cpu trace file, don't bother by iterating over
3527 * all cpu and peek directly.
3528 */
3529 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3530 if (ring_buffer_empty_cpu(buffer, cpu_file))
3531 return NULL;
3532 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3533 if (ent_cpu)
3534 *ent_cpu = cpu_file;
3535
3536 return ent;
3537 }
3538
3539 for_each_tracing_cpu(cpu) {
3540
3541 if (ring_buffer_empty_cpu(buffer, cpu))
3542 continue;
3543
3544 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3545
3546 /*
3547 * Pick the entry with the smallest timestamp:
3548 */
3549 if (ent && (!next || ts < next_ts)) {
3550 next = ent;
3551 next_cpu = cpu;
3552 next_ts = ts;
3553 next_lost = lost_events;
3554 next_size = iter->ent_size;
3555 }
3556 }
3557
3558 iter->ent_size = next_size;
3559
3560 if (ent_cpu)
3561 *ent_cpu = next_cpu;
3562
3563 if (ent_ts)
3564 *ent_ts = next_ts;
3565
3566 if (missing_events)
3567 *missing_events = next_lost;
3568
3569 return next;
3570 }
3571
3572 #define STATIC_TEMP_BUF_SIZE 128
3573 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3574
3575 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3576 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3577 int *ent_cpu, u64 *ent_ts)
3578 {
3579 /* __find_next_entry will reset ent_size */
3580 int ent_size = iter->ent_size;
3581 struct trace_entry *entry;
3582
3583 /*
3584 * If called from ftrace_dump(), then the iter->temp buffer
3585 * will be the static_temp_buf and not created from kmalloc.
3586 * If the entry size is greater than the buffer, we can
3587 * not save it. Just return NULL in that case. This is only
3588 * used to add markers when two consecutive events' time
3589 * stamps have a large delta. See trace_print_lat_context()
3590 */
3591 if (iter->temp == static_temp_buf &&
3592 STATIC_TEMP_BUF_SIZE < ent_size)
3593 return NULL;
3594
3595 /*
3596 * The __find_next_entry() may call peek_next_entry(), which may
3597 * call ring_buffer_peek() that may make the contents of iter->ent
3598 * undefined. Need to copy iter->ent now.
3599 */
3600 if (iter->ent && iter->ent != iter->temp) {
3601 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3602 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3603 void *temp;
3604 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3605 if (!temp)
3606 return NULL;
3607 kfree(iter->temp);
3608 iter->temp = temp;
3609 iter->temp_size = iter->ent_size;
3610 }
3611 memcpy(iter->temp, iter->ent, iter->ent_size);
3612 iter->ent = iter->temp;
3613 }
3614 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3615 /* Put back the original ent_size */
3616 iter->ent_size = ent_size;
3617
3618 return entry;
3619 }
3620
3621 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3622 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3623 {
3624 iter->ent = __find_next_entry(iter, &iter->cpu,
3625 &iter->lost_events, &iter->ts);
3626
3627 if (iter->ent)
3628 trace_iterator_increment(iter);
3629
3630 return iter->ent ? iter : NULL;
3631 }
3632
trace_consume(struct trace_iterator * iter)3633 static void trace_consume(struct trace_iterator *iter)
3634 {
3635 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3636 &iter->lost_events);
3637 }
3638
s_next(struct seq_file * m,void * v,loff_t * pos)3639 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3640 {
3641 struct trace_iterator *iter = m->private;
3642 int i = (int)*pos;
3643 void *ent;
3644
3645 WARN_ON_ONCE(iter->leftover);
3646
3647 (*pos)++;
3648
3649 /* can't go backwards */
3650 if (iter->idx > i)
3651 return NULL;
3652
3653 if (iter->idx < 0)
3654 ent = trace_find_next_entry_inc(iter);
3655 else
3656 ent = iter;
3657
3658 while (ent && iter->idx < i)
3659 ent = trace_find_next_entry_inc(iter);
3660
3661 iter->pos = *pos;
3662
3663 return ent;
3664 }
3665
tracing_iter_reset(struct trace_iterator * iter,int cpu)3666 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3667 {
3668 struct ring_buffer_iter *buf_iter;
3669 unsigned long entries = 0;
3670 u64 ts;
3671
3672 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3673
3674 buf_iter = trace_buffer_iter(iter, cpu);
3675 if (!buf_iter)
3676 return;
3677
3678 ring_buffer_iter_reset(buf_iter);
3679
3680 /*
3681 * We could have the case with the max latency tracers
3682 * that a reset never took place on a cpu. This is evident
3683 * by the timestamp being before the start of the buffer.
3684 */
3685 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3686 if (ts >= iter->array_buffer->time_start)
3687 break;
3688 entries++;
3689 ring_buffer_iter_advance(buf_iter);
3690 }
3691
3692 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3693 }
3694
3695 /*
3696 * The current tracer is copied to avoid a global locking
3697 * all around.
3698 */
s_start(struct seq_file * m,loff_t * pos)3699 static void *s_start(struct seq_file *m, loff_t *pos)
3700 {
3701 struct trace_iterator *iter = m->private;
3702 struct trace_array *tr = iter->tr;
3703 int cpu_file = iter->cpu_file;
3704 void *p = NULL;
3705 loff_t l = 0;
3706 int cpu;
3707
3708 /*
3709 * copy the tracer to avoid using a global lock all around.
3710 * iter->trace is a copy of current_trace, the pointer to the
3711 * name may be used instead of a strcmp(), as iter->trace->name
3712 * will point to the same string as current_trace->name.
3713 */
3714 mutex_lock(&trace_types_lock);
3715 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
3716 /* Close iter->trace before switching to the new current tracer */
3717 if (iter->trace->close)
3718 iter->trace->close(iter);
3719 *iter->trace = *tr->current_trace;
3720 /* Reopen the new current tracer */
3721 if (iter->trace->open)
3722 iter->trace->open(iter);
3723 }
3724 mutex_unlock(&trace_types_lock);
3725
3726 #ifdef CONFIG_TRACER_MAX_TRACE
3727 if (iter->snapshot && iter->trace->use_max_tr)
3728 return ERR_PTR(-EBUSY);
3729 #endif
3730
3731 if (*pos != iter->pos) {
3732 iter->ent = NULL;
3733 iter->cpu = 0;
3734 iter->idx = -1;
3735
3736 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3737 for_each_tracing_cpu(cpu)
3738 tracing_iter_reset(iter, cpu);
3739 } else
3740 tracing_iter_reset(iter, cpu_file);
3741
3742 iter->leftover = 0;
3743 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3744 ;
3745
3746 } else {
3747 /*
3748 * If we overflowed the seq_file before, then we want
3749 * to just reuse the trace_seq buffer again.
3750 */
3751 if (iter->leftover)
3752 p = iter;
3753 else {
3754 l = *pos - 1;
3755 p = s_next(m, p, &l);
3756 }
3757 }
3758
3759 trace_event_read_lock();
3760 trace_access_lock(cpu_file);
3761 return p;
3762 }
3763
s_stop(struct seq_file * m,void * p)3764 static void s_stop(struct seq_file *m, void *p)
3765 {
3766 struct trace_iterator *iter = m->private;
3767
3768 #ifdef CONFIG_TRACER_MAX_TRACE
3769 if (iter->snapshot && iter->trace->use_max_tr)
3770 return;
3771 #endif
3772
3773 trace_access_unlock(iter->cpu_file);
3774 trace_event_read_unlock();
3775 }
3776
3777 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3778 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3779 unsigned long *entries, int cpu)
3780 {
3781 unsigned long count;
3782
3783 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3784 /*
3785 * If this buffer has skipped entries, then we hold all
3786 * entries for the trace and we need to ignore the
3787 * ones before the time stamp.
3788 */
3789 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3790 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3791 /* total is the same as the entries */
3792 *total = count;
3793 } else
3794 *total = count +
3795 ring_buffer_overrun_cpu(buf->buffer, cpu);
3796 *entries = count;
3797 }
3798
3799 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3800 get_total_entries(struct array_buffer *buf,
3801 unsigned long *total, unsigned long *entries)
3802 {
3803 unsigned long t, e;
3804 int cpu;
3805
3806 *total = 0;
3807 *entries = 0;
3808
3809 for_each_tracing_cpu(cpu) {
3810 get_total_entries_cpu(buf, &t, &e, cpu);
3811 *total += t;
3812 *entries += e;
3813 }
3814 }
3815
trace_total_entries_cpu(struct trace_array * tr,int cpu)3816 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3817 {
3818 unsigned long total, entries;
3819
3820 if (!tr)
3821 tr = &global_trace;
3822
3823 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3824
3825 return entries;
3826 }
3827
trace_total_entries(struct trace_array * tr)3828 unsigned long trace_total_entries(struct trace_array *tr)
3829 {
3830 unsigned long total, entries;
3831
3832 if (!tr)
3833 tr = &global_trace;
3834
3835 get_total_entries(&tr->array_buffer, &total, &entries);
3836
3837 return entries;
3838 }
3839
print_lat_help_header(struct seq_file * m)3840 static void print_lat_help_header(struct seq_file *m)
3841 {
3842 seq_puts(m, "# _------=> CPU# \n"
3843 "# / _-----=> irqs-off \n"
3844 "# | / _----=> need-resched \n"
3845 "# || / _---=> hardirq/softirq \n"
3846 "# ||| / _--=> preempt-depth \n"
3847 "# |||| / delay \n"
3848 "# cmd pid ||||| time | caller \n"
3849 "# \\ / ||||| \\ | / \n");
3850 }
3851
print_event_info(struct array_buffer * buf,struct seq_file * m)3852 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3853 {
3854 unsigned long total;
3855 unsigned long entries;
3856
3857 get_total_entries(buf, &total, &entries);
3858 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3859 entries, total, num_online_cpus());
3860 seq_puts(m, "#\n");
3861 }
3862
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3863 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3864 unsigned int flags)
3865 {
3866 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3867
3868 print_event_info(buf, m);
3869
3870 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3871 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3872 }
3873
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3874 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3875 unsigned int flags)
3876 {
3877 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3878 const char *space = " ";
3879 int prec = tgid ? 12 : 2;
3880
3881 print_event_info(buf, m);
3882
3883 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3884 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3885 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3886 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3887 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3888 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3889 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3890 }
3891
3892 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3893 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3894 {
3895 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3896 struct array_buffer *buf = iter->array_buffer;
3897 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3898 struct tracer *type = iter->trace;
3899 unsigned long entries;
3900 unsigned long total;
3901 const char *name = "preemption";
3902
3903 name = type->name;
3904
3905 get_total_entries(buf, &total, &entries);
3906
3907 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3908 name, UTS_RELEASE);
3909 seq_puts(m, "# -----------------------------------"
3910 "---------------------------------\n");
3911 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3912 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3913 nsecs_to_usecs(data->saved_latency),
3914 entries,
3915 total,
3916 buf->cpu,
3917 #if defined(CONFIG_PREEMPT_NONE)
3918 "server",
3919 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3920 "desktop",
3921 #elif defined(CONFIG_PREEMPT)
3922 "preempt",
3923 #elif defined(CONFIG_PREEMPT_RT)
3924 "preempt_rt",
3925 #else
3926 "unknown",
3927 #endif
3928 /* These are reserved for later use */
3929 0, 0, 0, 0);
3930 #ifdef CONFIG_SMP
3931 seq_printf(m, " #P:%d)\n", num_online_cpus());
3932 #else
3933 seq_puts(m, ")\n");
3934 #endif
3935 seq_puts(m, "# -----------------\n");
3936 seq_printf(m, "# | task: %.16s-%d "
3937 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3938 data->comm, data->pid,
3939 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3940 data->policy, data->rt_priority);
3941 seq_puts(m, "# -----------------\n");
3942
3943 if (data->critical_start) {
3944 seq_puts(m, "# => started at: ");
3945 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3946 trace_print_seq(m, &iter->seq);
3947 seq_puts(m, "\n# => ended at: ");
3948 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3949 trace_print_seq(m, &iter->seq);
3950 seq_puts(m, "\n#\n");
3951 }
3952
3953 seq_puts(m, "#\n");
3954 }
3955
test_cpu_buff_start(struct trace_iterator * iter)3956 static void test_cpu_buff_start(struct trace_iterator *iter)
3957 {
3958 struct trace_seq *s = &iter->seq;
3959 struct trace_array *tr = iter->tr;
3960
3961 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3962 return;
3963
3964 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3965 return;
3966
3967 if (cpumask_available(iter->started) &&
3968 cpumask_test_cpu(iter->cpu, iter->started))
3969 return;
3970
3971 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3972 return;
3973
3974 if (cpumask_available(iter->started))
3975 cpumask_set_cpu(iter->cpu, iter->started);
3976
3977 /* Don't print started cpu buffer for the first entry of the trace */
3978 if (iter->idx > 1)
3979 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3980 iter->cpu);
3981 }
3982
print_trace_fmt(struct trace_iterator * iter)3983 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3984 {
3985 struct trace_array *tr = iter->tr;
3986 struct trace_seq *s = &iter->seq;
3987 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3988 struct trace_entry *entry;
3989 struct trace_event *event;
3990
3991 entry = iter->ent;
3992
3993 test_cpu_buff_start(iter);
3994
3995 event = ftrace_find_event(entry->type);
3996
3997 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3998 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3999 trace_print_lat_context(iter);
4000 else
4001 trace_print_context(iter);
4002 }
4003
4004 if (trace_seq_has_overflowed(s))
4005 return TRACE_TYPE_PARTIAL_LINE;
4006
4007 if (event)
4008 return event->funcs->trace(iter, sym_flags, event);
4009
4010 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4011
4012 return trace_handle_return(s);
4013 }
4014
print_raw_fmt(struct trace_iterator * iter)4015 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4016 {
4017 struct trace_array *tr = iter->tr;
4018 struct trace_seq *s = &iter->seq;
4019 struct trace_entry *entry;
4020 struct trace_event *event;
4021
4022 entry = iter->ent;
4023
4024 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4025 trace_seq_printf(s, "%d %d %llu ",
4026 entry->pid, iter->cpu, iter->ts);
4027
4028 if (trace_seq_has_overflowed(s))
4029 return TRACE_TYPE_PARTIAL_LINE;
4030
4031 event = ftrace_find_event(entry->type);
4032 if (event)
4033 return event->funcs->raw(iter, 0, event);
4034
4035 trace_seq_printf(s, "%d ?\n", entry->type);
4036
4037 return trace_handle_return(s);
4038 }
4039
print_hex_fmt(struct trace_iterator * iter)4040 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4041 {
4042 struct trace_array *tr = iter->tr;
4043 struct trace_seq *s = &iter->seq;
4044 unsigned char newline = '\n';
4045 struct trace_entry *entry;
4046 struct trace_event *event;
4047
4048 entry = iter->ent;
4049
4050 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4051 SEQ_PUT_HEX_FIELD(s, entry->pid);
4052 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4053 SEQ_PUT_HEX_FIELD(s, iter->ts);
4054 if (trace_seq_has_overflowed(s))
4055 return TRACE_TYPE_PARTIAL_LINE;
4056 }
4057
4058 event = ftrace_find_event(entry->type);
4059 if (event) {
4060 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4061 if (ret != TRACE_TYPE_HANDLED)
4062 return ret;
4063 }
4064
4065 SEQ_PUT_FIELD(s, newline);
4066
4067 return trace_handle_return(s);
4068 }
4069
print_bin_fmt(struct trace_iterator * iter)4070 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4071 {
4072 struct trace_array *tr = iter->tr;
4073 struct trace_seq *s = &iter->seq;
4074 struct trace_entry *entry;
4075 struct trace_event *event;
4076
4077 entry = iter->ent;
4078
4079 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4080 SEQ_PUT_FIELD(s, entry->pid);
4081 SEQ_PUT_FIELD(s, iter->cpu);
4082 SEQ_PUT_FIELD(s, iter->ts);
4083 if (trace_seq_has_overflowed(s))
4084 return TRACE_TYPE_PARTIAL_LINE;
4085 }
4086
4087 event = ftrace_find_event(entry->type);
4088 return event ? event->funcs->binary(iter, 0, event) :
4089 TRACE_TYPE_HANDLED;
4090 }
4091
trace_empty(struct trace_iterator * iter)4092 int trace_empty(struct trace_iterator *iter)
4093 {
4094 struct ring_buffer_iter *buf_iter;
4095 int cpu;
4096
4097 /* If we are looking at one CPU buffer, only check that one */
4098 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4099 cpu = iter->cpu_file;
4100 buf_iter = trace_buffer_iter(iter, cpu);
4101 if (buf_iter) {
4102 if (!ring_buffer_iter_empty(buf_iter))
4103 return 0;
4104 } else {
4105 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4106 return 0;
4107 }
4108 return 1;
4109 }
4110
4111 for_each_tracing_cpu(cpu) {
4112 buf_iter = trace_buffer_iter(iter, cpu);
4113 if (buf_iter) {
4114 if (!ring_buffer_iter_empty(buf_iter))
4115 return 0;
4116 } else {
4117 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4118 return 0;
4119 }
4120 }
4121
4122 return 1;
4123 }
4124
4125 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4126 enum print_line_t print_trace_line(struct trace_iterator *iter)
4127 {
4128 struct trace_array *tr = iter->tr;
4129 unsigned long trace_flags = tr->trace_flags;
4130 enum print_line_t ret;
4131
4132 if (iter->lost_events) {
4133 if (iter->lost_events == (unsigned long)-1)
4134 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4135 iter->cpu);
4136 else
4137 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4138 iter->cpu, iter->lost_events);
4139 if (trace_seq_has_overflowed(&iter->seq))
4140 return TRACE_TYPE_PARTIAL_LINE;
4141 }
4142
4143 if (iter->trace && iter->trace->print_line) {
4144 ret = iter->trace->print_line(iter);
4145 if (ret != TRACE_TYPE_UNHANDLED)
4146 return ret;
4147 }
4148
4149 if (iter->ent->type == TRACE_BPUTS &&
4150 trace_flags & TRACE_ITER_PRINTK &&
4151 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4152 return trace_print_bputs_msg_only(iter);
4153
4154 if (iter->ent->type == TRACE_BPRINT &&
4155 trace_flags & TRACE_ITER_PRINTK &&
4156 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4157 return trace_print_bprintk_msg_only(iter);
4158
4159 if (iter->ent->type == TRACE_PRINT &&
4160 trace_flags & TRACE_ITER_PRINTK &&
4161 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4162 return trace_print_printk_msg_only(iter);
4163
4164 if (trace_flags & TRACE_ITER_BIN)
4165 return print_bin_fmt(iter);
4166
4167 if (trace_flags & TRACE_ITER_HEX)
4168 return print_hex_fmt(iter);
4169
4170 if (trace_flags & TRACE_ITER_RAW)
4171 return print_raw_fmt(iter);
4172
4173 return print_trace_fmt(iter);
4174 }
4175
trace_latency_header(struct seq_file * m)4176 void trace_latency_header(struct seq_file *m)
4177 {
4178 struct trace_iterator *iter = m->private;
4179 struct trace_array *tr = iter->tr;
4180
4181 /* print nothing if the buffers are empty */
4182 if (trace_empty(iter))
4183 return;
4184
4185 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4186 print_trace_header(m, iter);
4187
4188 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4189 print_lat_help_header(m);
4190 }
4191
trace_default_header(struct seq_file * m)4192 void trace_default_header(struct seq_file *m)
4193 {
4194 struct trace_iterator *iter = m->private;
4195 struct trace_array *tr = iter->tr;
4196 unsigned long trace_flags = tr->trace_flags;
4197
4198 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4199 return;
4200
4201 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4202 /* print nothing if the buffers are empty */
4203 if (trace_empty(iter))
4204 return;
4205 print_trace_header(m, iter);
4206 if (!(trace_flags & TRACE_ITER_VERBOSE))
4207 print_lat_help_header(m);
4208 } else {
4209 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4210 if (trace_flags & TRACE_ITER_IRQ_INFO)
4211 print_func_help_header_irq(iter->array_buffer,
4212 m, trace_flags);
4213 else
4214 print_func_help_header(iter->array_buffer, m,
4215 trace_flags);
4216 }
4217 }
4218 }
4219
test_ftrace_alive(struct seq_file * m)4220 static void test_ftrace_alive(struct seq_file *m)
4221 {
4222 if (!ftrace_is_dead())
4223 return;
4224 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4225 "# MAY BE MISSING FUNCTION EVENTS\n");
4226 }
4227
4228 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4229 static void show_snapshot_main_help(struct seq_file *m)
4230 {
4231 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4232 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4233 "# Takes a snapshot of the main buffer.\n"
4234 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4235 "# (Doesn't have to be '2' works with any number that\n"
4236 "# is not a '0' or '1')\n");
4237 }
4238
show_snapshot_percpu_help(struct seq_file * m)4239 static void show_snapshot_percpu_help(struct seq_file *m)
4240 {
4241 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4242 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4243 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4244 "# Takes a snapshot of the main buffer for this cpu.\n");
4245 #else
4246 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4247 "# Must use main snapshot file to allocate.\n");
4248 #endif
4249 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4250 "# (Doesn't have to be '2' works with any number that\n"
4251 "# is not a '0' or '1')\n");
4252 }
4253
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4254 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4255 {
4256 if (iter->tr->allocated_snapshot)
4257 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4258 else
4259 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4260
4261 seq_puts(m, "# Snapshot commands:\n");
4262 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4263 show_snapshot_main_help(m);
4264 else
4265 show_snapshot_percpu_help(m);
4266 }
4267 #else
4268 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4269 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4270 #endif
4271
s_show(struct seq_file * m,void * v)4272 static int s_show(struct seq_file *m, void *v)
4273 {
4274 struct trace_iterator *iter = v;
4275 int ret;
4276
4277 if (iter->ent == NULL) {
4278 if (iter->tr) {
4279 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4280 seq_puts(m, "#\n");
4281 test_ftrace_alive(m);
4282 }
4283 if (iter->snapshot && trace_empty(iter))
4284 print_snapshot_help(m, iter);
4285 else if (iter->trace && iter->trace->print_header)
4286 iter->trace->print_header(m);
4287 else
4288 trace_default_header(m);
4289
4290 } else if (iter->leftover) {
4291 /*
4292 * If we filled the seq_file buffer earlier, we
4293 * want to just show it now.
4294 */
4295 ret = trace_print_seq(m, &iter->seq);
4296
4297 /* ret should this time be zero, but you never know */
4298 iter->leftover = ret;
4299
4300 } else {
4301 ret = print_trace_line(iter);
4302 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4303 iter->seq.full = 0;
4304 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4305 }
4306 ret = trace_print_seq(m, &iter->seq);
4307 /*
4308 * If we overflow the seq_file buffer, then it will
4309 * ask us for this data again at start up.
4310 * Use that instead.
4311 * ret is 0 if seq_file write succeeded.
4312 * -1 otherwise.
4313 */
4314 iter->leftover = ret;
4315 }
4316
4317 return 0;
4318 }
4319
4320 /*
4321 * Should be used after trace_array_get(), trace_types_lock
4322 * ensures that i_cdev was already initialized.
4323 */
tracing_get_cpu(struct inode * inode)4324 static inline int tracing_get_cpu(struct inode *inode)
4325 {
4326 if (inode->i_cdev) /* See trace_create_cpu_file() */
4327 return (long)inode->i_cdev - 1;
4328 return RING_BUFFER_ALL_CPUS;
4329 }
4330
4331 static const struct seq_operations tracer_seq_ops = {
4332 .start = s_start,
4333 .next = s_next,
4334 .stop = s_stop,
4335 .show = s_show,
4336 };
4337
4338 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4339 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4340 {
4341 struct trace_array *tr = inode->i_private;
4342 struct trace_iterator *iter;
4343 int cpu;
4344
4345 if (tracing_disabled)
4346 return ERR_PTR(-ENODEV);
4347
4348 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4349 if (!iter)
4350 return ERR_PTR(-ENOMEM);
4351
4352 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4353 GFP_KERNEL);
4354 if (!iter->buffer_iter)
4355 goto release;
4356
4357 /*
4358 * trace_find_next_entry() may need to save off iter->ent.
4359 * It will place it into the iter->temp buffer. As most
4360 * events are less than 128, allocate a buffer of that size.
4361 * If one is greater, then trace_find_next_entry() will
4362 * allocate a new buffer to adjust for the bigger iter->ent.
4363 * It's not critical if it fails to get allocated here.
4364 */
4365 iter->temp = kmalloc(128, GFP_KERNEL);
4366 if (iter->temp)
4367 iter->temp_size = 128;
4368
4369 /*
4370 * We make a copy of the current tracer to avoid concurrent
4371 * changes on it while we are reading.
4372 */
4373 mutex_lock(&trace_types_lock);
4374 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4375 if (!iter->trace)
4376 goto fail;
4377
4378 *iter->trace = *tr->current_trace;
4379
4380 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4381 goto fail;
4382
4383 iter->tr = tr;
4384
4385 #ifdef CONFIG_TRACER_MAX_TRACE
4386 /* Currently only the top directory has a snapshot */
4387 if (tr->current_trace->print_max || snapshot)
4388 iter->array_buffer = &tr->max_buffer;
4389 else
4390 #endif
4391 iter->array_buffer = &tr->array_buffer;
4392 iter->snapshot = snapshot;
4393 iter->pos = -1;
4394 iter->cpu_file = tracing_get_cpu(inode);
4395 mutex_init(&iter->mutex);
4396
4397 /* Notify the tracer early; before we stop tracing. */
4398 if (iter->trace->open)
4399 iter->trace->open(iter);
4400
4401 /* Annotate start of buffers if we had overruns */
4402 if (ring_buffer_overruns(iter->array_buffer->buffer))
4403 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4404
4405 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4406 if (trace_clocks[tr->clock_id].in_ns)
4407 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4408
4409 /*
4410 * If pause-on-trace is enabled, then stop the trace while
4411 * dumping, unless this is the "snapshot" file
4412 */
4413 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4414 tracing_stop_tr(tr);
4415
4416 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4417 for_each_tracing_cpu(cpu) {
4418 iter->buffer_iter[cpu] =
4419 ring_buffer_read_prepare(iter->array_buffer->buffer,
4420 cpu, GFP_KERNEL);
4421 }
4422 ring_buffer_read_prepare_sync();
4423 for_each_tracing_cpu(cpu) {
4424 ring_buffer_read_start(iter->buffer_iter[cpu]);
4425 tracing_iter_reset(iter, cpu);
4426 }
4427 } else {
4428 cpu = iter->cpu_file;
4429 iter->buffer_iter[cpu] =
4430 ring_buffer_read_prepare(iter->array_buffer->buffer,
4431 cpu, GFP_KERNEL);
4432 ring_buffer_read_prepare_sync();
4433 ring_buffer_read_start(iter->buffer_iter[cpu]);
4434 tracing_iter_reset(iter, cpu);
4435 }
4436
4437 mutex_unlock(&trace_types_lock);
4438
4439 return iter;
4440
4441 fail:
4442 mutex_unlock(&trace_types_lock);
4443 kfree(iter->trace);
4444 kfree(iter->temp);
4445 kfree(iter->buffer_iter);
4446 release:
4447 seq_release_private(inode, file);
4448 return ERR_PTR(-ENOMEM);
4449 }
4450
tracing_open_generic(struct inode * inode,struct file * filp)4451 int tracing_open_generic(struct inode *inode, struct file *filp)
4452 {
4453 int ret;
4454
4455 ret = tracing_check_open_get_tr(NULL);
4456 if (ret)
4457 return ret;
4458
4459 filp->private_data = inode->i_private;
4460 return 0;
4461 }
4462
tracing_is_disabled(void)4463 bool tracing_is_disabled(void)
4464 {
4465 return (tracing_disabled) ? true: false;
4466 }
4467
4468 /*
4469 * Open and update trace_array ref count.
4470 * Must have the current trace_array passed to it.
4471 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4472 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4473 {
4474 struct trace_array *tr = inode->i_private;
4475 int ret;
4476
4477 ret = tracing_check_open_get_tr(tr);
4478 if (ret)
4479 return ret;
4480
4481 filp->private_data = inode->i_private;
4482
4483 return 0;
4484 }
4485
4486 /*
4487 * The private pointer of the inode is the trace_event_file.
4488 * Update the tr ref count associated to it.
4489 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4490 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4491 {
4492 struct trace_event_file *file = inode->i_private;
4493 int ret;
4494
4495 ret = tracing_check_open_get_tr(file->tr);
4496 if (ret)
4497 return ret;
4498
4499 filp->private_data = inode->i_private;
4500
4501 return 0;
4502 }
4503
tracing_release_file_tr(struct inode * inode,struct file * filp)4504 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4505 {
4506 struct trace_event_file *file = inode->i_private;
4507
4508 trace_array_put(file->tr);
4509
4510 return 0;
4511 }
4512
tracing_release(struct inode * inode,struct file * file)4513 static int tracing_release(struct inode *inode, struct file *file)
4514 {
4515 struct trace_array *tr = inode->i_private;
4516 struct seq_file *m = file->private_data;
4517 struct trace_iterator *iter;
4518 int cpu;
4519
4520 if (!(file->f_mode & FMODE_READ)) {
4521 trace_array_put(tr);
4522 return 0;
4523 }
4524
4525 /* Writes do not use seq_file */
4526 iter = m->private;
4527 mutex_lock(&trace_types_lock);
4528
4529 for_each_tracing_cpu(cpu) {
4530 if (iter->buffer_iter[cpu])
4531 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4532 }
4533
4534 if (iter->trace && iter->trace->close)
4535 iter->trace->close(iter);
4536
4537 if (!iter->snapshot && tr->stop_count)
4538 /* reenable tracing if it was previously enabled */
4539 tracing_start_tr(tr);
4540
4541 __trace_array_put(tr);
4542
4543 mutex_unlock(&trace_types_lock);
4544
4545 mutex_destroy(&iter->mutex);
4546 free_cpumask_var(iter->started);
4547 kfree(iter->temp);
4548 kfree(iter->trace);
4549 kfree(iter->buffer_iter);
4550 seq_release_private(inode, file);
4551
4552 return 0;
4553 }
4554
tracing_release_generic_tr(struct inode * inode,struct file * file)4555 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4556 {
4557 struct trace_array *tr = inode->i_private;
4558
4559 trace_array_put(tr);
4560 return 0;
4561 }
4562
tracing_single_release_tr(struct inode * inode,struct file * file)4563 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4564 {
4565 struct trace_array *tr = inode->i_private;
4566
4567 trace_array_put(tr);
4568
4569 return single_release(inode, file);
4570 }
4571
tracing_open(struct inode * inode,struct file * file)4572 static int tracing_open(struct inode *inode, struct file *file)
4573 {
4574 struct trace_array *tr = inode->i_private;
4575 struct trace_iterator *iter;
4576 int ret;
4577
4578 ret = tracing_check_open_get_tr(tr);
4579 if (ret)
4580 return ret;
4581
4582 /* If this file was open for write, then erase contents */
4583 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4584 int cpu = tracing_get_cpu(inode);
4585 struct array_buffer *trace_buf = &tr->array_buffer;
4586
4587 #ifdef CONFIG_TRACER_MAX_TRACE
4588 if (tr->current_trace->print_max)
4589 trace_buf = &tr->max_buffer;
4590 #endif
4591
4592 if (cpu == RING_BUFFER_ALL_CPUS)
4593 tracing_reset_online_cpus(trace_buf);
4594 else
4595 tracing_reset_cpu(trace_buf, cpu);
4596 }
4597
4598 if (file->f_mode & FMODE_READ) {
4599 iter = __tracing_open(inode, file, false);
4600 if (IS_ERR(iter))
4601 ret = PTR_ERR(iter);
4602 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4603 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4604 }
4605
4606 if (ret < 0)
4607 trace_array_put(tr);
4608
4609 return ret;
4610 }
4611
4612 /*
4613 * Some tracers are not suitable for instance buffers.
4614 * A tracer is always available for the global array (toplevel)
4615 * or if it explicitly states that it is.
4616 */
4617 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4618 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4619 {
4620 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4621 }
4622
4623 /* Find the next tracer that this trace array may use */
4624 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4625 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4626 {
4627 while (t && !trace_ok_for_array(t, tr))
4628 t = t->next;
4629
4630 return t;
4631 }
4632
4633 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4634 t_next(struct seq_file *m, void *v, loff_t *pos)
4635 {
4636 struct trace_array *tr = m->private;
4637 struct tracer *t = v;
4638
4639 (*pos)++;
4640
4641 if (t)
4642 t = get_tracer_for_array(tr, t->next);
4643
4644 return t;
4645 }
4646
t_start(struct seq_file * m,loff_t * pos)4647 static void *t_start(struct seq_file *m, loff_t *pos)
4648 {
4649 struct trace_array *tr = m->private;
4650 struct tracer *t;
4651 loff_t l = 0;
4652
4653 mutex_lock(&trace_types_lock);
4654
4655 t = get_tracer_for_array(tr, trace_types);
4656 for (; t && l < *pos; t = t_next(m, t, &l))
4657 ;
4658
4659 return t;
4660 }
4661
t_stop(struct seq_file * m,void * p)4662 static void t_stop(struct seq_file *m, void *p)
4663 {
4664 mutex_unlock(&trace_types_lock);
4665 }
4666
t_show(struct seq_file * m,void * v)4667 static int t_show(struct seq_file *m, void *v)
4668 {
4669 struct tracer *t = v;
4670
4671 if (!t)
4672 return 0;
4673
4674 seq_puts(m, t->name);
4675 if (t->next)
4676 seq_putc(m, ' ');
4677 else
4678 seq_putc(m, '\n');
4679
4680 return 0;
4681 }
4682
4683 static const struct seq_operations show_traces_seq_ops = {
4684 .start = t_start,
4685 .next = t_next,
4686 .stop = t_stop,
4687 .show = t_show,
4688 };
4689
show_traces_open(struct inode * inode,struct file * file)4690 static int show_traces_open(struct inode *inode, struct file *file)
4691 {
4692 struct trace_array *tr = inode->i_private;
4693 struct seq_file *m;
4694 int ret;
4695
4696 ret = tracing_check_open_get_tr(tr);
4697 if (ret)
4698 return ret;
4699
4700 ret = seq_open(file, &show_traces_seq_ops);
4701 if (ret) {
4702 trace_array_put(tr);
4703 return ret;
4704 }
4705
4706 m = file->private_data;
4707 m->private = tr;
4708
4709 return 0;
4710 }
4711
show_traces_release(struct inode * inode,struct file * file)4712 static int show_traces_release(struct inode *inode, struct file *file)
4713 {
4714 struct trace_array *tr = inode->i_private;
4715
4716 trace_array_put(tr);
4717 return seq_release(inode, file);
4718 }
4719
4720 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4721 tracing_write_stub(struct file *filp, const char __user *ubuf,
4722 size_t count, loff_t *ppos)
4723 {
4724 return count;
4725 }
4726
tracing_lseek(struct file * file,loff_t offset,int whence)4727 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4728 {
4729 int ret;
4730
4731 if (file->f_mode & FMODE_READ)
4732 ret = seq_lseek(file, offset, whence);
4733 else
4734 file->f_pos = ret = 0;
4735
4736 return ret;
4737 }
4738
4739 static const struct file_operations tracing_fops = {
4740 .open = tracing_open,
4741 .read = seq_read,
4742 .read_iter = seq_read_iter,
4743 .splice_read = generic_file_splice_read,
4744 .write = tracing_write_stub,
4745 .llseek = tracing_lseek,
4746 .release = tracing_release,
4747 };
4748
4749 static const struct file_operations show_traces_fops = {
4750 .open = show_traces_open,
4751 .read = seq_read,
4752 .llseek = seq_lseek,
4753 .release = show_traces_release,
4754 };
4755
4756 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4757 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4758 size_t count, loff_t *ppos)
4759 {
4760 struct trace_array *tr = file_inode(filp)->i_private;
4761 char *mask_str;
4762 int len;
4763
4764 len = snprintf(NULL, 0, "%*pb\n",
4765 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4766 mask_str = kmalloc(len, GFP_KERNEL);
4767 if (!mask_str)
4768 return -ENOMEM;
4769
4770 len = snprintf(mask_str, len, "%*pb\n",
4771 cpumask_pr_args(tr->tracing_cpumask));
4772 if (len >= count) {
4773 count = -EINVAL;
4774 goto out_err;
4775 }
4776 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4777
4778 out_err:
4779 kfree(mask_str);
4780
4781 return count;
4782 }
4783
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4784 int tracing_set_cpumask(struct trace_array *tr,
4785 cpumask_var_t tracing_cpumask_new)
4786 {
4787 int cpu;
4788
4789 if (!tr)
4790 return -EINVAL;
4791
4792 local_irq_disable();
4793 arch_spin_lock(&tr->max_lock);
4794 for_each_tracing_cpu(cpu) {
4795 /*
4796 * Increase/decrease the disabled counter if we are
4797 * about to flip a bit in the cpumask:
4798 */
4799 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4800 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4801 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4802 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4803 #ifdef CONFIG_TRACER_MAX_TRACE
4804 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
4805 #endif
4806 }
4807 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4808 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4809 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4810 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4811 #ifdef CONFIG_TRACER_MAX_TRACE
4812 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
4813 #endif
4814 }
4815 }
4816 arch_spin_unlock(&tr->max_lock);
4817 local_irq_enable();
4818
4819 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4820
4821 return 0;
4822 }
4823
4824 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4825 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4826 size_t count, loff_t *ppos)
4827 {
4828 struct trace_array *tr = file_inode(filp)->i_private;
4829 cpumask_var_t tracing_cpumask_new;
4830 int err;
4831
4832 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4833 return -ENOMEM;
4834
4835 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4836 if (err)
4837 goto err_free;
4838
4839 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4840 if (err)
4841 goto err_free;
4842
4843 free_cpumask_var(tracing_cpumask_new);
4844
4845 return count;
4846
4847 err_free:
4848 free_cpumask_var(tracing_cpumask_new);
4849
4850 return err;
4851 }
4852
4853 static const struct file_operations tracing_cpumask_fops = {
4854 .open = tracing_open_generic_tr,
4855 .read = tracing_cpumask_read,
4856 .write = tracing_cpumask_write,
4857 .release = tracing_release_generic_tr,
4858 .llseek = generic_file_llseek,
4859 };
4860
tracing_trace_options_show(struct seq_file * m,void * v)4861 static int tracing_trace_options_show(struct seq_file *m, void *v)
4862 {
4863 struct tracer_opt *trace_opts;
4864 struct trace_array *tr = m->private;
4865 u32 tracer_flags;
4866 int i;
4867
4868 mutex_lock(&trace_types_lock);
4869 tracer_flags = tr->current_trace->flags->val;
4870 trace_opts = tr->current_trace->flags->opts;
4871
4872 for (i = 0; trace_options[i]; i++) {
4873 if (tr->trace_flags & (1 << i))
4874 seq_printf(m, "%s\n", trace_options[i]);
4875 else
4876 seq_printf(m, "no%s\n", trace_options[i]);
4877 }
4878
4879 for (i = 0; trace_opts[i].name; i++) {
4880 if (tracer_flags & trace_opts[i].bit)
4881 seq_printf(m, "%s\n", trace_opts[i].name);
4882 else
4883 seq_printf(m, "no%s\n", trace_opts[i].name);
4884 }
4885 mutex_unlock(&trace_types_lock);
4886
4887 return 0;
4888 }
4889
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4890 static int __set_tracer_option(struct trace_array *tr,
4891 struct tracer_flags *tracer_flags,
4892 struct tracer_opt *opts, int neg)
4893 {
4894 struct tracer *trace = tracer_flags->trace;
4895 int ret;
4896
4897 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4898 if (ret)
4899 return ret;
4900
4901 if (neg)
4902 tracer_flags->val &= ~opts->bit;
4903 else
4904 tracer_flags->val |= opts->bit;
4905 return 0;
4906 }
4907
4908 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4909 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4910 {
4911 struct tracer *trace = tr->current_trace;
4912 struct tracer_flags *tracer_flags = trace->flags;
4913 struct tracer_opt *opts = NULL;
4914 int i;
4915
4916 for (i = 0; tracer_flags->opts[i].name; i++) {
4917 opts = &tracer_flags->opts[i];
4918
4919 if (strcmp(cmp, opts->name) == 0)
4920 return __set_tracer_option(tr, trace->flags, opts, neg);
4921 }
4922
4923 return -EINVAL;
4924 }
4925
4926 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4927 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4928 {
4929 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4930 return -1;
4931
4932 return 0;
4933 }
4934
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4935 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4936 {
4937 int *map;
4938
4939 if ((mask == TRACE_ITER_RECORD_TGID) ||
4940 (mask == TRACE_ITER_RECORD_CMD))
4941 lockdep_assert_held(&event_mutex);
4942
4943 /* do nothing if flag is already set */
4944 if (!!(tr->trace_flags & mask) == !!enabled)
4945 return 0;
4946
4947 /* Give the tracer a chance to approve the change */
4948 if (tr->current_trace->flag_changed)
4949 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4950 return -EINVAL;
4951
4952 if (enabled)
4953 tr->trace_flags |= mask;
4954 else
4955 tr->trace_flags &= ~mask;
4956
4957 if (mask == TRACE_ITER_RECORD_CMD)
4958 trace_event_enable_cmd_record(enabled);
4959
4960 if (mask == TRACE_ITER_RECORD_TGID) {
4961 if (!tgid_map) {
4962 tgid_map_max = pid_max;
4963 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4964 GFP_KERNEL);
4965
4966 /*
4967 * Pairs with smp_load_acquire() in
4968 * trace_find_tgid_ptr() to ensure that if it observes
4969 * the tgid_map we just allocated then it also observes
4970 * the corresponding tgid_map_max value.
4971 */
4972 smp_store_release(&tgid_map, map);
4973 }
4974 if (!tgid_map) {
4975 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4976 return -ENOMEM;
4977 }
4978
4979 trace_event_enable_tgid_record(enabled);
4980 }
4981
4982 if (mask == TRACE_ITER_EVENT_FORK)
4983 trace_event_follow_fork(tr, enabled);
4984
4985 if (mask == TRACE_ITER_FUNC_FORK)
4986 ftrace_pid_follow_fork(tr, enabled);
4987
4988 if (mask == TRACE_ITER_OVERWRITE) {
4989 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4990 #ifdef CONFIG_TRACER_MAX_TRACE
4991 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4992 #endif
4993 }
4994
4995 if (mask == TRACE_ITER_PRINTK) {
4996 trace_printk_start_stop_comm(enabled);
4997 trace_printk_control(enabled);
4998 }
4999
5000 return 0;
5001 }
5002
trace_set_options(struct trace_array * tr,char * option)5003 int trace_set_options(struct trace_array *tr, char *option)
5004 {
5005 char *cmp;
5006 int neg = 0;
5007 int ret;
5008 size_t orig_len = strlen(option);
5009 int len;
5010
5011 cmp = strstrip(option);
5012
5013 len = str_has_prefix(cmp, "no");
5014 if (len)
5015 neg = 1;
5016
5017 cmp += len;
5018
5019 mutex_lock(&event_mutex);
5020 mutex_lock(&trace_types_lock);
5021
5022 ret = match_string(trace_options, -1, cmp);
5023 /* If no option could be set, test the specific tracer options */
5024 if (ret < 0)
5025 ret = set_tracer_option(tr, cmp, neg);
5026 else
5027 ret = set_tracer_flag(tr, 1 << ret, !neg);
5028
5029 mutex_unlock(&trace_types_lock);
5030 mutex_unlock(&event_mutex);
5031
5032 /*
5033 * If the first trailing whitespace is replaced with '\0' by strstrip,
5034 * turn it back into a space.
5035 */
5036 if (orig_len > strlen(option))
5037 option[strlen(option)] = ' ';
5038
5039 return ret;
5040 }
5041
apply_trace_boot_options(void)5042 static void __init apply_trace_boot_options(void)
5043 {
5044 char *buf = trace_boot_options_buf;
5045 char *option;
5046
5047 while (true) {
5048 option = strsep(&buf, ",");
5049
5050 if (!option)
5051 break;
5052
5053 if (*option)
5054 trace_set_options(&global_trace, option);
5055
5056 /* Put back the comma to allow this to be called again */
5057 if (buf)
5058 *(buf - 1) = ',';
5059 }
5060 }
5061
5062 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5063 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5064 size_t cnt, loff_t *ppos)
5065 {
5066 struct seq_file *m = filp->private_data;
5067 struct trace_array *tr = m->private;
5068 char buf[64];
5069 int ret;
5070
5071 if (cnt >= sizeof(buf))
5072 return -EINVAL;
5073
5074 if (copy_from_user(buf, ubuf, cnt))
5075 return -EFAULT;
5076
5077 buf[cnt] = 0;
5078
5079 ret = trace_set_options(tr, buf);
5080 if (ret < 0)
5081 return ret;
5082
5083 *ppos += cnt;
5084
5085 return cnt;
5086 }
5087
tracing_trace_options_open(struct inode * inode,struct file * file)5088 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5089 {
5090 struct trace_array *tr = inode->i_private;
5091 int ret;
5092
5093 ret = tracing_check_open_get_tr(tr);
5094 if (ret)
5095 return ret;
5096
5097 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5098 if (ret < 0)
5099 trace_array_put(tr);
5100
5101 return ret;
5102 }
5103
5104 static const struct file_operations tracing_iter_fops = {
5105 .open = tracing_trace_options_open,
5106 .read = seq_read,
5107 .llseek = seq_lseek,
5108 .release = tracing_single_release_tr,
5109 .write = tracing_trace_options_write,
5110 };
5111
5112 static const char readme_msg[] =
5113 "tracing mini-HOWTO:\n\n"
5114 "# echo 0 > tracing_on : quick way to disable tracing\n"
5115 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5116 " Important files:\n"
5117 " trace\t\t\t- The static contents of the buffer\n"
5118 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5119 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5120 " current_tracer\t- function and latency tracers\n"
5121 " available_tracers\t- list of configured tracers for current_tracer\n"
5122 " error_log\t- error log for failed commands (that support it)\n"
5123 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5124 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5125 " trace_clock\t\t-change the clock used to order events\n"
5126 " local: Per cpu clock but may not be synced across CPUs\n"
5127 " global: Synced across CPUs but slows tracing down.\n"
5128 " counter: Not a clock, but just an increment\n"
5129 " uptime: Jiffy counter from time of boot\n"
5130 " perf: Same clock that perf events use\n"
5131 #ifdef CONFIG_X86_64
5132 " x86-tsc: TSC cycle counter\n"
5133 #endif
5134 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5135 " delta: Delta difference against a buffer-wide timestamp\n"
5136 " absolute: Absolute (standalone) timestamp\n"
5137 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5138 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5139 " tracing_cpumask\t- Limit which CPUs to trace\n"
5140 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5141 "\t\t\t Remove sub-buffer with rmdir\n"
5142 " trace_options\t\t- Set format or modify how tracing happens\n"
5143 "\t\t\t Disable an option by prefixing 'no' to the\n"
5144 "\t\t\t option name\n"
5145 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5146 #ifdef CONFIG_DYNAMIC_FTRACE
5147 "\n available_filter_functions - list of functions that can be filtered on\n"
5148 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5149 "\t\t\t functions\n"
5150 "\t accepts: func_full_name or glob-matching-pattern\n"
5151 "\t modules: Can select a group via module\n"
5152 "\t Format: :mod:<module-name>\n"
5153 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5154 "\t triggers: a command to perform when function is hit\n"
5155 "\t Format: <function>:<trigger>[:count]\n"
5156 "\t trigger: traceon, traceoff\n"
5157 "\t\t enable_event:<system>:<event>\n"
5158 "\t\t disable_event:<system>:<event>\n"
5159 #ifdef CONFIG_STACKTRACE
5160 "\t\t stacktrace\n"
5161 #endif
5162 #ifdef CONFIG_TRACER_SNAPSHOT
5163 "\t\t snapshot\n"
5164 #endif
5165 "\t\t dump\n"
5166 "\t\t cpudump\n"
5167 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5168 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5169 "\t The first one will disable tracing every time do_fault is hit\n"
5170 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5171 "\t The first time do trap is hit and it disables tracing, the\n"
5172 "\t counter will decrement to 2. If tracing is already disabled,\n"
5173 "\t the counter will not decrement. It only decrements when the\n"
5174 "\t trigger did work\n"
5175 "\t To remove trigger without count:\n"
5176 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5177 "\t To remove trigger with a count:\n"
5178 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5179 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5180 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5181 "\t modules: Can select a group via module command :mod:\n"
5182 "\t Does not accept triggers\n"
5183 #endif /* CONFIG_DYNAMIC_FTRACE */
5184 #ifdef CONFIG_FUNCTION_TRACER
5185 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5186 "\t\t (function)\n"
5187 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5188 "\t\t (function)\n"
5189 #endif
5190 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5191 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5192 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5193 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5194 #endif
5195 #ifdef CONFIG_TRACER_SNAPSHOT
5196 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5197 "\t\t\t snapshot buffer. Read the contents for more\n"
5198 "\t\t\t information\n"
5199 #endif
5200 #ifdef CONFIG_STACK_TRACER
5201 " stack_trace\t\t- Shows the max stack trace when active\n"
5202 " stack_max_size\t- Shows current max stack size that was traced\n"
5203 "\t\t\t Write into this file to reset the max size (trigger a\n"
5204 "\t\t\t new trace)\n"
5205 #ifdef CONFIG_DYNAMIC_FTRACE
5206 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5207 "\t\t\t traces\n"
5208 #endif
5209 #endif /* CONFIG_STACK_TRACER */
5210 #ifdef CONFIG_DYNAMIC_EVENTS
5211 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5212 "\t\t\t Write into this file to define/undefine new trace events.\n"
5213 #endif
5214 #ifdef CONFIG_KPROBE_EVENTS
5215 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5216 "\t\t\t Write into this file to define/undefine new trace events.\n"
5217 #endif
5218 #ifdef CONFIG_UPROBE_EVENTS
5219 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5220 "\t\t\t Write into this file to define/undefine new trace events.\n"
5221 #endif
5222 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5223 "\t accepts: event-definitions (one definition per line)\n"
5224 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5225 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5226 #ifdef CONFIG_HIST_TRIGGERS
5227 "\t s:[synthetic/]<event> <field> [<field>]\n"
5228 #endif
5229 "\t -:[<group>/]<event>\n"
5230 #ifdef CONFIG_KPROBE_EVENTS
5231 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5232 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5233 #endif
5234 #ifdef CONFIG_UPROBE_EVENTS
5235 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5236 #endif
5237 "\t args: <name>=fetcharg[:type]\n"
5238 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5239 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5240 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5241 #else
5242 "\t $stack<index>, $stack, $retval, $comm,\n"
5243 #endif
5244 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5245 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5246 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5247 "\t <type>\\[<array-size>\\]\n"
5248 #ifdef CONFIG_HIST_TRIGGERS
5249 "\t field: <stype> <name>;\n"
5250 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5251 "\t [unsigned] char/int/long\n"
5252 #endif
5253 #endif
5254 " events/\t\t- Directory containing all trace event subsystems:\n"
5255 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5256 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5257 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5258 "\t\t\t events\n"
5259 " filter\t\t- If set, only events passing filter are traced\n"
5260 " events/<system>/<event>/\t- Directory containing control files for\n"
5261 "\t\t\t <event>:\n"
5262 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5263 " filter\t\t- If set, only events passing filter are traced\n"
5264 " trigger\t\t- If set, a command to perform when event is hit\n"
5265 "\t Format: <trigger>[:count][if <filter>]\n"
5266 "\t trigger: traceon, traceoff\n"
5267 "\t enable_event:<system>:<event>\n"
5268 "\t disable_event:<system>:<event>\n"
5269 #ifdef CONFIG_HIST_TRIGGERS
5270 "\t enable_hist:<system>:<event>\n"
5271 "\t disable_hist:<system>:<event>\n"
5272 #endif
5273 #ifdef CONFIG_STACKTRACE
5274 "\t\t stacktrace\n"
5275 #endif
5276 #ifdef CONFIG_TRACER_SNAPSHOT
5277 "\t\t snapshot\n"
5278 #endif
5279 #ifdef CONFIG_HIST_TRIGGERS
5280 "\t\t hist (see below)\n"
5281 #endif
5282 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5283 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5284 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5285 "\t events/block/block_unplug/trigger\n"
5286 "\t The first disables tracing every time block_unplug is hit.\n"
5287 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5288 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5289 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5290 "\t Like function triggers, the counter is only decremented if it\n"
5291 "\t enabled or disabled tracing.\n"
5292 "\t To remove a trigger without a count:\n"
5293 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5294 "\t To remove a trigger with a count:\n"
5295 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5296 "\t Filters can be ignored when removing a trigger.\n"
5297 #ifdef CONFIG_HIST_TRIGGERS
5298 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5299 "\t Format: hist:keys=<field1[,field2,...]>\n"
5300 "\t [:values=<field1[,field2,...]>]\n"
5301 "\t [:sort=<field1[,field2,...]>]\n"
5302 "\t [:size=#entries]\n"
5303 "\t [:pause][:continue][:clear]\n"
5304 "\t [:name=histname1]\n"
5305 "\t [:<handler>.<action>]\n"
5306 "\t [if <filter>]\n\n"
5307 "\t Note, special fields can be used as well:\n"
5308 "\t common_timestamp - to record current timestamp\n"
5309 "\t common_cpu - to record the CPU the event happened on\n"
5310 "\n"
5311 "\t When a matching event is hit, an entry is added to a hash\n"
5312 "\t table using the key(s) and value(s) named, and the value of a\n"
5313 "\t sum called 'hitcount' is incremented. Keys and values\n"
5314 "\t correspond to fields in the event's format description. Keys\n"
5315 "\t can be any field, or the special string 'stacktrace'.\n"
5316 "\t Compound keys consisting of up to two fields can be specified\n"
5317 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5318 "\t fields. Sort keys consisting of up to two fields can be\n"
5319 "\t specified using the 'sort' keyword. The sort direction can\n"
5320 "\t be modified by appending '.descending' or '.ascending' to a\n"
5321 "\t sort field. The 'size' parameter can be used to specify more\n"
5322 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5323 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5324 "\t its histogram data will be shared with other triggers of the\n"
5325 "\t same name, and trigger hits will update this common data.\n\n"
5326 "\t Reading the 'hist' file for the event will dump the hash\n"
5327 "\t table in its entirety to stdout. If there are multiple hist\n"
5328 "\t triggers attached to an event, there will be a table for each\n"
5329 "\t trigger in the output. The table displayed for a named\n"
5330 "\t trigger will be the same as any other instance having the\n"
5331 "\t same name. The default format used to display a given field\n"
5332 "\t can be modified by appending any of the following modifiers\n"
5333 "\t to the field name, as applicable:\n\n"
5334 "\t .hex display a number as a hex value\n"
5335 "\t .sym display an address as a symbol\n"
5336 "\t .sym-offset display an address as a symbol and offset\n"
5337 "\t .execname display a common_pid as a program name\n"
5338 "\t .syscall display a syscall id as a syscall name\n"
5339 "\t .log2 display log2 value rather than raw number\n"
5340 "\t .usecs display a common_timestamp in microseconds\n\n"
5341 "\t The 'pause' parameter can be used to pause an existing hist\n"
5342 "\t trigger or to start a hist trigger but not log any events\n"
5343 "\t until told to do so. 'continue' can be used to start or\n"
5344 "\t restart a paused hist trigger.\n\n"
5345 "\t The 'clear' parameter will clear the contents of a running\n"
5346 "\t hist trigger and leave its current paused/active state\n"
5347 "\t unchanged.\n\n"
5348 "\t The enable_hist and disable_hist triggers can be used to\n"
5349 "\t have one event conditionally start and stop another event's\n"
5350 "\t already-attached hist trigger. The syntax is analogous to\n"
5351 "\t the enable_event and disable_event triggers.\n\n"
5352 "\t Hist trigger handlers and actions are executed whenever a\n"
5353 "\t a histogram entry is added or updated. They take the form:\n\n"
5354 "\t <handler>.<action>\n\n"
5355 "\t The available handlers are:\n\n"
5356 "\t onmatch(matching.event) - invoke on addition or update\n"
5357 "\t onmax(var) - invoke if var exceeds current max\n"
5358 "\t onchange(var) - invoke action if var changes\n\n"
5359 "\t The available actions are:\n\n"
5360 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5361 "\t save(field,...) - save current event fields\n"
5362 #ifdef CONFIG_TRACER_SNAPSHOT
5363 "\t snapshot() - snapshot the trace buffer\n\n"
5364 #endif
5365 #ifdef CONFIG_SYNTH_EVENTS
5366 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5367 "\t Write into this file to define/undefine new synthetic events.\n"
5368 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5369 #endif
5370 #endif
5371 ;
5372
5373 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5374 tracing_readme_read(struct file *filp, char __user *ubuf,
5375 size_t cnt, loff_t *ppos)
5376 {
5377 return simple_read_from_buffer(ubuf, cnt, ppos,
5378 readme_msg, strlen(readme_msg));
5379 }
5380
5381 static const struct file_operations tracing_readme_fops = {
5382 .open = tracing_open_generic,
5383 .read = tracing_readme_read,
5384 .llseek = generic_file_llseek,
5385 };
5386
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5387 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5388 {
5389 int pid = ++(*pos);
5390
5391 return trace_find_tgid_ptr(pid);
5392 }
5393
saved_tgids_start(struct seq_file * m,loff_t * pos)5394 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5395 {
5396 int pid = *pos;
5397
5398 return trace_find_tgid_ptr(pid);
5399 }
5400
saved_tgids_stop(struct seq_file * m,void * v)5401 static void saved_tgids_stop(struct seq_file *m, void *v)
5402 {
5403 }
5404
saved_tgids_show(struct seq_file * m,void * v)5405 static int saved_tgids_show(struct seq_file *m, void *v)
5406 {
5407 int *entry = (int *)v;
5408 int pid = entry - tgid_map;
5409 int tgid = *entry;
5410
5411 if (tgid == 0)
5412 return SEQ_SKIP;
5413
5414 seq_printf(m, "%d %d\n", pid, tgid);
5415 return 0;
5416 }
5417
5418 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5419 .start = saved_tgids_start,
5420 .stop = saved_tgids_stop,
5421 .next = saved_tgids_next,
5422 .show = saved_tgids_show,
5423 };
5424
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5425 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5426 {
5427 int ret;
5428
5429 ret = tracing_check_open_get_tr(NULL);
5430 if (ret)
5431 return ret;
5432
5433 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5434 }
5435
5436
5437 static const struct file_operations tracing_saved_tgids_fops = {
5438 .open = tracing_saved_tgids_open,
5439 .read = seq_read,
5440 .llseek = seq_lseek,
5441 .release = seq_release,
5442 };
5443
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5444 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5445 {
5446 unsigned int *ptr = v;
5447
5448 if (*pos || m->count)
5449 ptr++;
5450
5451 (*pos)++;
5452
5453 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5454 ptr++) {
5455 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5456 continue;
5457
5458 return ptr;
5459 }
5460
5461 return NULL;
5462 }
5463
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5464 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5465 {
5466 void *v;
5467 loff_t l = 0;
5468
5469 preempt_disable();
5470 arch_spin_lock(&trace_cmdline_lock);
5471
5472 v = &savedcmd->map_cmdline_to_pid[0];
5473 while (l <= *pos) {
5474 v = saved_cmdlines_next(m, v, &l);
5475 if (!v)
5476 return NULL;
5477 }
5478
5479 return v;
5480 }
5481
saved_cmdlines_stop(struct seq_file * m,void * v)5482 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5483 {
5484 arch_spin_unlock(&trace_cmdline_lock);
5485 preempt_enable();
5486 }
5487
saved_cmdlines_show(struct seq_file * m,void * v)5488 static int saved_cmdlines_show(struct seq_file *m, void *v)
5489 {
5490 char buf[TASK_COMM_LEN];
5491 unsigned int *pid = v;
5492
5493 __trace_find_cmdline(*pid, buf);
5494 seq_printf(m, "%d %s\n", *pid, buf);
5495 return 0;
5496 }
5497
5498 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5499 .start = saved_cmdlines_start,
5500 .next = saved_cmdlines_next,
5501 .stop = saved_cmdlines_stop,
5502 .show = saved_cmdlines_show,
5503 };
5504
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5505 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5506 {
5507 int ret;
5508
5509 ret = tracing_check_open_get_tr(NULL);
5510 if (ret)
5511 return ret;
5512
5513 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5514 }
5515
5516 static const struct file_operations tracing_saved_cmdlines_fops = {
5517 .open = tracing_saved_cmdlines_open,
5518 .read = seq_read,
5519 .llseek = seq_lseek,
5520 .release = seq_release,
5521 };
5522
5523 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5524 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5525 size_t cnt, loff_t *ppos)
5526 {
5527 char buf[64];
5528 int r;
5529
5530 preempt_disable();
5531 arch_spin_lock(&trace_cmdline_lock);
5532 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5533 arch_spin_unlock(&trace_cmdline_lock);
5534 preempt_enable();
5535
5536 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5537 }
5538
tracing_resize_saved_cmdlines(unsigned int val)5539 static int tracing_resize_saved_cmdlines(unsigned int val)
5540 {
5541 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5542
5543 s = allocate_cmdlines_buffer(val);
5544 if (!s)
5545 return -ENOMEM;
5546
5547 preempt_disable();
5548 arch_spin_lock(&trace_cmdline_lock);
5549 savedcmd_temp = savedcmd;
5550 savedcmd = s;
5551 arch_spin_unlock(&trace_cmdline_lock);
5552 preempt_enable();
5553 free_saved_cmdlines_buffer(savedcmd_temp);
5554
5555 return 0;
5556 }
5557
5558 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5559 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5560 size_t cnt, loff_t *ppos)
5561 {
5562 unsigned long val;
5563 int ret;
5564
5565 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5566 if (ret)
5567 return ret;
5568
5569 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5570 if (!val || val > PID_MAX_DEFAULT)
5571 return -EINVAL;
5572
5573 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5574 if (ret < 0)
5575 return ret;
5576
5577 *ppos += cnt;
5578
5579 return cnt;
5580 }
5581
5582 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5583 .open = tracing_open_generic,
5584 .read = tracing_saved_cmdlines_size_read,
5585 .write = tracing_saved_cmdlines_size_write,
5586 };
5587
5588 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5589 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5590 update_eval_map(union trace_eval_map_item *ptr)
5591 {
5592 if (!ptr->map.eval_string) {
5593 if (ptr->tail.next) {
5594 ptr = ptr->tail.next;
5595 /* Set ptr to the next real item (skip head) */
5596 ptr++;
5597 } else
5598 return NULL;
5599 }
5600 return ptr;
5601 }
5602
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5603 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5604 {
5605 union trace_eval_map_item *ptr = v;
5606
5607 /*
5608 * Paranoid! If ptr points to end, we don't want to increment past it.
5609 * This really should never happen.
5610 */
5611 (*pos)++;
5612 ptr = update_eval_map(ptr);
5613 if (WARN_ON_ONCE(!ptr))
5614 return NULL;
5615
5616 ptr++;
5617 ptr = update_eval_map(ptr);
5618
5619 return ptr;
5620 }
5621
eval_map_start(struct seq_file * m,loff_t * pos)5622 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5623 {
5624 union trace_eval_map_item *v;
5625 loff_t l = 0;
5626
5627 mutex_lock(&trace_eval_mutex);
5628
5629 v = trace_eval_maps;
5630 if (v)
5631 v++;
5632
5633 while (v && l < *pos) {
5634 v = eval_map_next(m, v, &l);
5635 }
5636
5637 return v;
5638 }
5639
eval_map_stop(struct seq_file * m,void * v)5640 static void eval_map_stop(struct seq_file *m, void *v)
5641 {
5642 mutex_unlock(&trace_eval_mutex);
5643 }
5644
eval_map_show(struct seq_file * m,void * v)5645 static int eval_map_show(struct seq_file *m, void *v)
5646 {
5647 union trace_eval_map_item *ptr = v;
5648
5649 seq_printf(m, "%s %ld (%s)\n",
5650 ptr->map.eval_string, ptr->map.eval_value,
5651 ptr->map.system);
5652
5653 return 0;
5654 }
5655
5656 static const struct seq_operations tracing_eval_map_seq_ops = {
5657 .start = eval_map_start,
5658 .next = eval_map_next,
5659 .stop = eval_map_stop,
5660 .show = eval_map_show,
5661 };
5662
tracing_eval_map_open(struct inode * inode,struct file * filp)5663 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5664 {
5665 int ret;
5666
5667 ret = tracing_check_open_get_tr(NULL);
5668 if (ret)
5669 return ret;
5670
5671 return seq_open(filp, &tracing_eval_map_seq_ops);
5672 }
5673
5674 static const struct file_operations tracing_eval_map_fops = {
5675 .open = tracing_eval_map_open,
5676 .read = seq_read,
5677 .llseek = seq_lseek,
5678 .release = seq_release,
5679 };
5680
5681 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5682 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5683 {
5684 /* Return tail of array given the head */
5685 return ptr + ptr->head.length + 1;
5686 }
5687
5688 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5689 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5690 int len)
5691 {
5692 struct trace_eval_map **stop;
5693 struct trace_eval_map **map;
5694 union trace_eval_map_item *map_array;
5695 union trace_eval_map_item *ptr;
5696
5697 stop = start + len;
5698
5699 /*
5700 * The trace_eval_maps contains the map plus a head and tail item,
5701 * where the head holds the module and length of array, and the
5702 * tail holds a pointer to the next list.
5703 */
5704 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5705 if (!map_array) {
5706 pr_warn("Unable to allocate trace eval mapping\n");
5707 return;
5708 }
5709
5710 mutex_lock(&trace_eval_mutex);
5711
5712 if (!trace_eval_maps)
5713 trace_eval_maps = map_array;
5714 else {
5715 ptr = trace_eval_maps;
5716 for (;;) {
5717 ptr = trace_eval_jmp_to_tail(ptr);
5718 if (!ptr->tail.next)
5719 break;
5720 ptr = ptr->tail.next;
5721
5722 }
5723 ptr->tail.next = map_array;
5724 }
5725 map_array->head.mod = mod;
5726 map_array->head.length = len;
5727 map_array++;
5728
5729 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5730 map_array->map = **map;
5731 map_array++;
5732 }
5733 memset(map_array, 0, sizeof(*map_array));
5734
5735 mutex_unlock(&trace_eval_mutex);
5736 }
5737
trace_create_eval_file(struct dentry * d_tracer)5738 static void trace_create_eval_file(struct dentry *d_tracer)
5739 {
5740 trace_create_file("eval_map", 0444, d_tracer,
5741 NULL, &tracing_eval_map_fops);
5742 }
5743
5744 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5745 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5746 static inline void trace_insert_eval_map_file(struct module *mod,
5747 struct trace_eval_map **start, int len) { }
5748 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5749
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5750 static void trace_insert_eval_map(struct module *mod,
5751 struct trace_eval_map **start, int len)
5752 {
5753 struct trace_eval_map **map;
5754
5755 if (len <= 0)
5756 return;
5757
5758 map = start;
5759
5760 trace_event_eval_update(map, len);
5761
5762 trace_insert_eval_map_file(mod, start, len);
5763 }
5764
5765 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5766 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5767 size_t cnt, loff_t *ppos)
5768 {
5769 struct trace_array *tr = filp->private_data;
5770 char buf[MAX_TRACER_SIZE+2];
5771 int r;
5772
5773 mutex_lock(&trace_types_lock);
5774 r = sprintf(buf, "%s\n", tr->current_trace->name);
5775 mutex_unlock(&trace_types_lock);
5776
5777 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5778 }
5779
tracer_init(struct tracer * t,struct trace_array * tr)5780 int tracer_init(struct tracer *t, struct trace_array *tr)
5781 {
5782 tracing_reset_online_cpus(&tr->array_buffer);
5783 return t->init(tr);
5784 }
5785
set_buffer_entries(struct array_buffer * buf,unsigned long val)5786 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5787 {
5788 int cpu;
5789
5790 for_each_tracing_cpu(cpu)
5791 per_cpu_ptr(buf->data, cpu)->entries = val;
5792 }
5793
update_buffer_entries(struct array_buffer * buf,int cpu)5794 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5795 {
5796 if (cpu == RING_BUFFER_ALL_CPUS) {
5797 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5798 } else {
5799 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5800 }
5801 }
5802
5803 #ifdef CONFIG_TRACER_MAX_TRACE
5804 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5805 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5806 struct array_buffer *size_buf, int cpu_id)
5807 {
5808 int cpu, ret = 0;
5809
5810 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5811 for_each_tracing_cpu(cpu) {
5812 ret = ring_buffer_resize(trace_buf->buffer,
5813 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5814 if (ret < 0)
5815 break;
5816 per_cpu_ptr(trace_buf->data, cpu)->entries =
5817 per_cpu_ptr(size_buf->data, cpu)->entries;
5818 }
5819 } else {
5820 ret = ring_buffer_resize(trace_buf->buffer,
5821 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5822 if (ret == 0)
5823 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5824 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5825 }
5826
5827 return ret;
5828 }
5829 #endif /* CONFIG_TRACER_MAX_TRACE */
5830
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5831 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5832 unsigned long size, int cpu)
5833 {
5834 int ret;
5835
5836 /*
5837 * If kernel or user changes the size of the ring buffer
5838 * we use the size that was given, and we can forget about
5839 * expanding it later.
5840 */
5841 ring_buffer_expanded = true;
5842
5843 /* May be called before buffers are initialized */
5844 if (!tr->array_buffer.buffer)
5845 return 0;
5846
5847 /* Do not allow tracing while resizing ring buffer */
5848 tracing_stop_tr(tr);
5849
5850 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5851 if (ret < 0)
5852 goto out_start;
5853
5854 #ifdef CONFIG_TRACER_MAX_TRACE
5855 if (!tr->allocated_snapshot)
5856 goto out;
5857
5858 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5859 if (ret < 0) {
5860 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5861 &tr->array_buffer, cpu);
5862 if (r < 0) {
5863 /*
5864 * AARGH! We are left with different
5865 * size max buffer!!!!
5866 * The max buffer is our "snapshot" buffer.
5867 * When a tracer needs a snapshot (one of the
5868 * latency tracers), it swaps the max buffer
5869 * with the saved snap shot. We succeeded to
5870 * update the size of the main buffer, but failed to
5871 * update the size of the max buffer. But when we tried
5872 * to reset the main buffer to the original size, we
5873 * failed there too. This is very unlikely to
5874 * happen, but if it does, warn and kill all
5875 * tracing.
5876 */
5877 WARN_ON(1);
5878 tracing_disabled = 1;
5879 }
5880 goto out_start;
5881 }
5882
5883 update_buffer_entries(&tr->max_buffer, cpu);
5884
5885 out:
5886 #endif /* CONFIG_TRACER_MAX_TRACE */
5887
5888 update_buffer_entries(&tr->array_buffer, cpu);
5889 out_start:
5890 tracing_start_tr(tr);
5891 return ret;
5892 }
5893
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5894 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5895 unsigned long size, int cpu_id)
5896 {
5897 int ret = size;
5898
5899 mutex_lock(&trace_types_lock);
5900
5901 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5902 /* make sure, this cpu is enabled in the mask */
5903 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5904 ret = -EINVAL;
5905 goto out;
5906 }
5907 }
5908
5909 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5910 if (ret < 0)
5911 ret = -ENOMEM;
5912
5913 out:
5914 mutex_unlock(&trace_types_lock);
5915
5916 return ret;
5917 }
5918
5919
5920 /**
5921 * tracing_update_buffers - used by tracing facility to expand ring buffers
5922 *
5923 * To save on memory when the tracing is never used on a system with it
5924 * configured in. The ring buffers are set to a minimum size. But once
5925 * a user starts to use the tracing facility, then they need to grow
5926 * to their default size.
5927 *
5928 * This function is to be called when a tracer is about to be used.
5929 */
tracing_update_buffers(void)5930 int tracing_update_buffers(void)
5931 {
5932 int ret = 0;
5933
5934 mutex_lock(&trace_types_lock);
5935 if (!ring_buffer_expanded)
5936 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5937 RING_BUFFER_ALL_CPUS);
5938 mutex_unlock(&trace_types_lock);
5939
5940 return ret;
5941 }
5942
5943 struct trace_option_dentry;
5944
5945 static void
5946 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5947
5948 /*
5949 * Used to clear out the tracer before deletion of an instance.
5950 * Must have trace_types_lock held.
5951 */
tracing_set_nop(struct trace_array * tr)5952 static void tracing_set_nop(struct trace_array *tr)
5953 {
5954 if (tr->current_trace == &nop_trace)
5955 return;
5956
5957 tr->current_trace->enabled--;
5958
5959 if (tr->current_trace->reset)
5960 tr->current_trace->reset(tr);
5961
5962 tr->current_trace = &nop_trace;
5963 }
5964
5965 static bool tracer_options_updated;
5966
add_tracer_options(struct trace_array * tr,struct tracer * t)5967 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5968 {
5969 /* Only enable if the directory has been created already. */
5970 if (!tr->dir)
5971 return;
5972
5973 /* Only create trace option files after update_tracer_options finish */
5974 if (!tracer_options_updated)
5975 return;
5976
5977 create_trace_option_files(tr, t);
5978 }
5979
tracing_set_tracer(struct trace_array * tr,const char * buf)5980 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5981 {
5982 struct tracer *t;
5983 #ifdef CONFIG_TRACER_MAX_TRACE
5984 bool had_max_tr;
5985 #endif
5986 int ret = 0;
5987
5988 mutex_lock(&trace_types_lock);
5989
5990 if (!ring_buffer_expanded) {
5991 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5992 RING_BUFFER_ALL_CPUS);
5993 if (ret < 0)
5994 goto out;
5995 ret = 0;
5996 }
5997
5998 for (t = trace_types; t; t = t->next) {
5999 if (strcmp(t->name, buf) == 0)
6000 break;
6001 }
6002 if (!t) {
6003 ret = -EINVAL;
6004 goto out;
6005 }
6006 if (t == tr->current_trace)
6007 goto out;
6008
6009 #ifdef CONFIG_TRACER_SNAPSHOT
6010 if (t->use_max_tr) {
6011 local_irq_disable();
6012 arch_spin_lock(&tr->max_lock);
6013 if (tr->cond_snapshot)
6014 ret = -EBUSY;
6015 arch_spin_unlock(&tr->max_lock);
6016 local_irq_enable();
6017 if (ret)
6018 goto out;
6019 }
6020 #endif
6021 /* Some tracers won't work on kernel command line */
6022 if (system_state < SYSTEM_RUNNING && t->noboot) {
6023 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6024 t->name);
6025 goto out;
6026 }
6027
6028 /* Some tracers are only allowed for the top level buffer */
6029 if (!trace_ok_for_array(t, tr)) {
6030 ret = -EINVAL;
6031 goto out;
6032 }
6033
6034 /* If trace pipe files are being read, we can't change the tracer */
6035 if (tr->trace_ref) {
6036 ret = -EBUSY;
6037 goto out;
6038 }
6039
6040 trace_branch_disable();
6041
6042 tr->current_trace->enabled--;
6043
6044 if (tr->current_trace->reset)
6045 tr->current_trace->reset(tr);
6046
6047 #ifdef CONFIG_TRACER_MAX_TRACE
6048 had_max_tr = tr->current_trace->use_max_tr;
6049
6050 /* Current trace needs to be nop_trace before synchronize_rcu */
6051 tr->current_trace = &nop_trace;
6052
6053 if (had_max_tr && !t->use_max_tr) {
6054 /*
6055 * We need to make sure that the update_max_tr sees that
6056 * current_trace changed to nop_trace to keep it from
6057 * swapping the buffers after we resize it.
6058 * The update_max_tr is called from interrupts disabled
6059 * so a synchronized_sched() is sufficient.
6060 */
6061 synchronize_rcu();
6062 free_snapshot(tr);
6063 }
6064
6065 if (t->use_max_tr && !tr->allocated_snapshot) {
6066 ret = tracing_alloc_snapshot_instance(tr);
6067 if (ret < 0)
6068 goto out;
6069 }
6070 #else
6071 tr->current_trace = &nop_trace;
6072 #endif
6073
6074 if (t->init) {
6075 ret = tracer_init(t, tr);
6076 if (ret)
6077 goto out;
6078 }
6079
6080 tr->current_trace = t;
6081 tr->current_trace->enabled++;
6082 trace_branch_enable(tr);
6083 out:
6084 mutex_unlock(&trace_types_lock);
6085
6086 return ret;
6087 }
6088
6089 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6090 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6091 size_t cnt, loff_t *ppos)
6092 {
6093 struct trace_array *tr = filp->private_data;
6094 char buf[MAX_TRACER_SIZE+1];
6095 int i;
6096 size_t ret;
6097 int err;
6098
6099 ret = cnt;
6100
6101 if (cnt > MAX_TRACER_SIZE)
6102 cnt = MAX_TRACER_SIZE;
6103
6104 if (copy_from_user(buf, ubuf, cnt))
6105 return -EFAULT;
6106
6107 buf[cnt] = 0;
6108
6109 /* strip ending whitespace. */
6110 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6111 buf[i] = 0;
6112
6113 err = tracing_set_tracer(tr, buf);
6114 if (err)
6115 return err;
6116
6117 *ppos += ret;
6118
6119 return ret;
6120 }
6121
6122 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6123 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6124 size_t cnt, loff_t *ppos)
6125 {
6126 char buf[64];
6127 int r;
6128
6129 r = snprintf(buf, sizeof(buf), "%ld\n",
6130 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6131 if (r > sizeof(buf))
6132 r = sizeof(buf);
6133 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6134 }
6135
6136 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6137 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6138 size_t cnt, loff_t *ppos)
6139 {
6140 unsigned long val;
6141 int ret;
6142
6143 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6144 if (ret)
6145 return ret;
6146
6147 *ptr = val * 1000;
6148
6149 return cnt;
6150 }
6151
6152 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6153 tracing_thresh_read(struct file *filp, char __user *ubuf,
6154 size_t cnt, loff_t *ppos)
6155 {
6156 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6157 }
6158
6159 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6160 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6161 size_t cnt, loff_t *ppos)
6162 {
6163 struct trace_array *tr = filp->private_data;
6164 int ret;
6165
6166 mutex_lock(&trace_types_lock);
6167 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6168 if (ret < 0)
6169 goto out;
6170
6171 if (tr->current_trace->update_thresh) {
6172 ret = tr->current_trace->update_thresh(tr);
6173 if (ret < 0)
6174 goto out;
6175 }
6176
6177 ret = cnt;
6178 out:
6179 mutex_unlock(&trace_types_lock);
6180
6181 return ret;
6182 }
6183
6184 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6185
6186 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6187 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6188 size_t cnt, loff_t *ppos)
6189 {
6190 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6191 }
6192
6193 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6194 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6195 size_t cnt, loff_t *ppos)
6196 {
6197 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6198 }
6199
6200 #endif
6201
tracing_open_pipe(struct inode * inode,struct file * filp)6202 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6203 {
6204 struct trace_array *tr = inode->i_private;
6205 struct trace_iterator *iter;
6206 int ret;
6207
6208 ret = tracing_check_open_get_tr(tr);
6209 if (ret)
6210 return ret;
6211
6212 mutex_lock(&trace_types_lock);
6213
6214 /* create a buffer to store the information to pass to userspace */
6215 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6216 if (!iter) {
6217 ret = -ENOMEM;
6218 __trace_array_put(tr);
6219 goto out;
6220 }
6221
6222 trace_seq_init(&iter->seq);
6223 iter->trace = tr->current_trace;
6224
6225 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6226 ret = -ENOMEM;
6227 goto fail;
6228 }
6229
6230 /* trace pipe does not show start of buffer */
6231 cpumask_setall(iter->started);
6232
6233 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6234 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6235
6236 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6237 if (trace_clocks[tr->clock_id].in_ns)
6238 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6239
6240 iter->tr = tr;
6241 iter->array_buffer = &tr->array_buffer;
6242 iter->cpu_file = tracing_get_cpu(inode);
6243 mutex_init(&iter->mutex);
6244 filp->private_data = iter;
6245
6246 if (iter->trace->pipe_open)
6247 iter->trace->pipe_open(iter);
6248
6249 nonseekable_open(inode, filp);
6250
6251 tr->trace_ref++;
6252 out:
6253 mutex_unlock(&trace_types_lock);
6254 return ret;
6255
6256 fail:
6257 kfree(iter);
6258 __trace_array_put(tr);
6259 mutex_unlock(&trace_types_lock);
6260 return ret;
6261 }
6262
tracing_release_pipe(struct inode * inode,struct file * file)6263 static int tracing_release_pipe(struct inode *inode, struct file *file)
6264 {
6265 struct trace_iterator *iter = file->private_data;
6266 struct trace_array *tr = inode->i_private;
6267
6268 mutex_lock(&trace_types_lock);
6269
6270 tr->trace_ref--;
6271
6272 if (iter->trace->pipe_close)
6273 iter->trace->pipe_close(iter);
6274
6275 mutex_unlock(&trace_types_lock);
6276
6277 free_cpumask_var(iter->started);
6278 kfree(iter->temp);
6279 mutex_destroy(&iter->mutex);
6280 kfree(iter);
6281
6282 trace_array_put(tr);
6283
6284 return 0;
6285 }
6286
6287 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6288 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6289 {
6290 struct trace_array *tr = iter->tr;
6291
6292 /* Iterators are static, they should be filled or empty */
6293 if (trace_buffer_iter(iter, iter->cpu_file))
6294 return EPOLLIN | EPOLLRDNORM;
6295
6296 if (tr->trace_flags & TRACE_ITER_BLOCK)
6297 /*
6298 * Always select as readable when in blocking mode
6299 */
6300 return EPOLLIN | EPOLLRDNORM;
6301 else
6302 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6303 filp, poll_table, iter->tr->buffer_percent);
6304 }
6305
6306 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6307 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6308 {
6309 struct trace_iterator *iter = filp->private_data;
6310
6311 return trace_poll(iter, filp, poll_table);
6312 }
6313
6314 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6315 static int tracing_wait_pipe(struct file *filp)
6316 {
6317 struct trace_iterator *iter = filp->private_data;
6318 int ret;
6319
6320 while (trace_empty(iter)) {
6321
6322 if ((filp->f_flags & O_NONBLOCK)) {
6323 return -EAGAIN;
6324 }
6325
6326 /*
6327 * We block until we read something and tracing is disabled.
6328 * We still block if tracing is disabled, but we have never
6329 * read anything. This allows a user to cat this file, and
6330 * then enable tracing. But after we have read something,
6331 * we give an EOF when tracing is again disabled.
6332 *
6333 * iter->pos will be 0 if we haven't read anything.
6334 */
6335 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6336 break;
6337
6338 mutex_unlock(&iter->mutex);
6339
6340 ret = wait_on_pipe(iter, 0);
6341
6342 mutex_lock(&iter->mutex);
6343
6344 if (ret)
6345 return ret;
6346 }
6347
6348 return 1;
6349 }
6350
6351 /*
6352 * Consumer reader.
6353 */
6354 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6355 tracing_read_pipe(struct file *filp, char __user *ubuf,
6356 size_t cnt, loff_t *ppos)
6357 {
6358 struct trace_iterator *iter = filp->private_data;
6359 ssize_t sret;
6360
6361 /*
6362 * Avoid more than one consumer on a single file descriptor
6363 * This is just a matter of traces coherency, the ring buffer itself
6364 * is protected.
6365 */
6366 mutex_lock(&iter->mutex);
6367
6368 /* return any leftover data */
6369 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6370 if (sret != -EBUSY)
6371 goto out;
6372
6373 trace_seq_init(&iter->seq);
6374
6375 if (iter->trace->read) {
6376 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6377 if (sret)
6378 goto out;
6379 }
6380
6381 waitagain:
6382 sret = tracing_wait_pipe(filp);
6383 if (sret <= 0)
6384 goto out;
6385
6386 /* stop when tracing is finished */
6387 if (trace_empty(iter)) {
6388 sret = 0;
6389 goto out;
6390 }
6391
6392 if (cnt >= PAGE_SIZE)
6393 cnt = PAGE_SIZE - 1;
6394
6395 /* reset all but tr, trace, and overruns */
6396 memset(&iter->seq, 0,
6397 sizeof(struct trace_iterator) -
6398 offsetof(struct trace_iterator, seq));
6399 cpumask_clear(iter->started);
6400 trace_seq_init(&iter->seq);
6401 iter->pos = -1;
6402
6403 trace_event_read_lock();
6404 trace_access_lock(iter->cpu_file);
6405 while (trace_find_next_entry_inc(iter) != NULL) {
6406 enum print_line_t ret;
6407 int save_len = iter->seq.seq.len;
6408
6409 ret = print_trace_line(iter);
6410 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6411 /*
6412 * If one print_trace_line() fills entire trace_seq in one shot,
6413 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6414 * In this case, we need to consume it, otherwise, loop will peek
6415 * this event next time, resulting in an infinite loop.
6416 */
6417 if (save_len == 0) {
6418 iter->seq.full = 0;
6419 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6420 trace_consume(iter);
6421 break;
6422 }
6423
6424 /* In other cases, don't print partial lines */
6425 iter->seq.seq.len = save_len;
6426 break;
6427 }
6428 if (ret != TRACE_TYPE_NO_CONSUME)
6429 trace_consume(iter);
6430
6431 if (trace_seq_used(&iter->seq) >= cnt)
6432 break;
6433
6434 /*
6435 * Setting the full flag means we reached the trace_seq buffer
6436 * size and we should leave by partial output condition above.
6437 * One of the trace_seq_* functions is not used properly.
6438 */
6439 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6440 iter->ent->type);
6441 }
6442 trace_access_unlock(iter->cpu_file);
6443 trace_event_read_unlock();
6444
6445 /* Now copy what we have to the user */
6446 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6447 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6448 trace_seq_init(&iter->seq);
6449
6450 /*
6451 * If there was nothing to send to user, in spite of consuming trace
6452 * entries, go back to wait for more entries.
6453 */
6454 if (sret == -EBUSY)
6455 goto waitagain;
6456
6457 out:
6458 mutex_unlock(&iter->mutex);
6459
6460 return sret;
6461 }
6462
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6463 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6464 unsigned int idx)
6465 {
6466 __free_page(spd->pages[idx]);
6467 }
6468
6469 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6470 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6471 {
6472 size_t count;
6473 int save_len;
6474 int ret;
6475
6476 /* Seq buffer is page-sized, exactly what we need. */
6477 for (;;) {
6478 save_len = iter->seq.seq.len;
6479 ret = print_trace_line(iter);
6480
6481 if (trace_seq_has_overflowed(&iter->seq)) {
6482 iter->seq.seq.len = save_len;
6483 break;
6484 }
6485
6486 /*
6487 * This should not be hit, because it should only
6488 * be set if the iter->seq overflowed. But check it
6489 * anyway to be safe.
6490 */
6491 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6492 iter->seq.seq.len = save_len;
6493 break;
6494 }
6495
6496 count = trace_seq_used(&iter->seq) - save_len;
6497 if (rem < count) {
6498 rem = 0;
6499 iter->seq.seq.len = save_len;
6500 break;
6501 }
6502
6503 if (ret != TRACE_TYPE_NO_CONSUME)
6504 trace_consume(iter);
6505 rem -= count;
6506 if (!trace_find_next_entry_inc(iter)) {
6507 rem = 0;
6508 iter->ent = NULL;
6509 break;
6510 }
6511 }
6512
6513 return rem;
6514 }
6515
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6516 static ssize_t tracing_splice_read_pipe(struct file *filp,
6517 loff_t *ppos,
6518 struct pipe_inode_info *pipe,
6519 size_t len,
6520 unsigned int flags)
6521 {
6522 struct page *pages_def[PIPE_DEF_BUFFERS];
6523 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6524 struct trace_iterator *iter = filp->private_data;
6525 struct splice_pipe_desc spd = {
6526 .pages = pages_def,
6527 .partial = partial_def,
6528 .nr_pages = 0, /* This gets updated below. */
6529 .nr_pages_max = PIPE_DEF_BUFFERS,
6530 .ops = &default_pipe_buf_ops,
6531 .spd_release = tracing_spd_release_pipe,
6532 };
6533 ssize_t ret;
6534 size_t rem;
6535 unsigned int i;
6536
6537 if (splice_grow_spd(pipe, &spd))
6538 return -ENOMEM;
6539
6540 mutex_lock(&iter->mutex);
6541
6542 if (iter->trace->splice_read) {
6543 ret = iter->trace->splice_read(iter, filp,
6544 ppos, pipe, len, flags);
6545 if (ret)
6546 goto out_err;
6547 }
6548
6549 ret = tracing_wait_pipe(filp);
6550 if (ret <= 0)
6551 goto out_err;
6552
6553 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6554 ret = -EFAULT;
6555 goto out_err;
6556 }
6557
6558 trace_event_read_lock();
6559 trace_access_lock(iter->cpu_file);
6560
6561 /* Fill as many pages as possible. */
6562 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6563 spd.pages[i] = alloc_page(GFP_KERNEL);
6564 if (!spd.pages[i])
6565 break;
6566
6567 rem = tracing_fill_pipe_page(rem, iter);
6568
6569 /* Copy the data into the page, so we can start over. */
6570 ret = trace_seq_to_buffer(&iter->seq,
6571 page_address(spd.pages[i]),
6572 trace_seq_used(&iter->seq));
6573 if (ret < 0) {
6574 __free_page(spd.pages[i]);
6575 break;
6576 }
6577 spd.partial[i].offset = 0;
6578 spd.partial[i].len = trace_seq_used(&iter->seq);
6579
6580 trace_seq_init(&iter->seq);
6581 }
6582
6583 trace_access_unlock(iter->cpu_file);
6584 trace_event_read_unlock();
6585 mutex_unlock(&iter->mutex);
6586
6587 spd.nr_pages = i;
6588
6589 if (i)
6590 ret = splice_to_pipe(pipe, &spd);
6591 else
6592 ret = 0;
6593 out:
6594 splice_shrink_spd(&spd);
6595 return ret;
6596
6597 out_err:
6598 mutex_unlock(&iter->mutex);
6599 goto out;
6600 }
6601
6602 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6603 tracing_entries_read(struct file *filp, char __user *ubuf,
6604 size_t cnt, loff_t *ppos)
6605 {
6606 struct inode *inode = file_inode(filp);
6607 struct trace_array *tr = inode->i_private;
6608 int cpu = tracing_get_cpu(inode);
6609 char buf[64];
6610 int r = 0;
6611 ssize_t ret;
6612
6613 mutex_lock(&trace_types_lock);
6614
6615 if (cpu == RING_BUFFER_ALL_CPUS) {
6616 int cpu, buf_size_same;
6617 unsigned long size;
6618
6619 size = 0;
6620 buf_size_same = 1;
6621 /* check if all cpu sizes are same */
6622 for_each_tracing_cpu(cpu) {
6623 /* fill in the size from first enabled cpu */
6624 if (size == 0)
6625 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6626 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6627 buf_size_same = 0;
6628 break;
6629 }
6630 }
6631
6632 if (buf_size_same) {
6633 if (!ring_buffer_expanded)
6634 r = sprintf(buf, "%lu (expanded: %lu)\n",
6635 size >> 10,
6636 trace_buf_size >> 10);
6637 else
6638 r = sprintf(buf, "%lu\n", size >> 10);
6639 } else
6640 r = sprintf(buf, "X\n");
6641 } else
6642 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6643
6644 mutex_unlock(&trace_types_lock);
6645
6646 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6647 return ret;
6648 }
6649
6650 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6651 tracing_entries_write(struct file *filp, const char __user *ubuf,
6652 size_t cnt, loff_t *ppos)
6653 {
6654 struct inode *inode = file_inode(filp);
6655 struct trace_array *tr = inode->i_private;
6656 unsigned long val;
6657 int ret;
6658
6659 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6660 if (ret)
6661 return ret;
6662
6663 /* must have at least 1 entry */
6664 if (!val)
6665 return -EINVAL;
6666
6667 /* value is in KB */
6668 val <<= 10;
6669 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6670 if (ret < 0)
6671 return ret;
6672
6673 *ppos += cnt;
6674
6675 return cnt;
6676 }
6677
6678 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6679 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6680 size_t cnt, loff_t *ppos)
6681 {
6682 struct trace_array *tr = filp->private_data;
6683 char buf[64];
6684 int r, cpu;
6685 unsigned long size = 0, expanded_size = 0;
6686
6687 mutex_lock(&trace_types_lock);
6688 for_each_tracing_cpu(cpu) {
6689 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6690 if (!ring_buffer_expanded)
6691 expanded_size += trace_buf_size >> 10;
6692 }
6693 if (ring_buffer_expanded)
6694 r = sprintf(buf, "%lu\n", size);
6695 else
6696 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6697 mutex_unlock(&trace_types_lock);
6698
6699 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6700 }
6701
6702 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6703 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6704 size_t cnt, loff_t *ppos)
6705 {
6706 /*
6707 * There is no need to read what the user has written, this function
6708 * is just to make sure that there is no error when "echo" is used
6709 */
6710
6711 *ppos += cnt;
6712
6713 return cnt;
6714 }
6715
6716 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6717 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6718 {
6719 struct trace_array *tr = inode->i_private;
6720
6721 /* disable tracing ? */
6722 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6723 tracer_tracing_off(tr);
6724 /* resize the ring buffer to 0 */
6725 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6726
6727 trace_array_put(tr);
6728
6729 return 0;
6730 }
6731
6732 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6733 tracing_mark_write(struct file *filp, const char __user *ubuf,
6734 size_t cnt, loff_t *fpos)
6735 {
6736 struct trace_array *tr = filp->private_data;
6737 struct ring_buffer_event *event;
6738 enum event_trigger_type tt = ETT_NONE;
6739 struct trace_buffer *buffer;
6740 struct print_entry *entry;
6741 unsigned long irq_flags;
6742 ssize_t written;
6743 int size;
6744 int len;
6745
6746 /* Used in tracing_mark_raw_write() as well */
6747 #define FAULTED_STR "<faulted>"
6748 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6749
6750 if (tracing_disabled)
6751 return -EINVAL;
6752
6753 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6754 return -EINVAL;
6755
6756 if (cnt > TRACE_BUF_SIZE)
6757 cnt = TRACE_BUF_SIZE;
6758
6759 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6760
6761 local_save_flags(irq_flags);
6762 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6763
6764 /* If less than "<faulted>", then make sure we can still add that */
6765 if (cnt < FAULTED_SIZE)
6766 size += FAULTED_SIZE - cnt;
6767
6768 buffer = tr->array_buffer.buffer;
6769 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6770 irq_flags, preempt_count());
6771 if (unlikely(!event))
6772 /* Ring buffer disabled, return as if not open for write */
6773 return -EBADF;
6774
6775 entry = ring_buffer_event_data(event);
6776 entry->ip = _THIS_IP_;
6777
6778 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6779 if (len) {
6780 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6781 cnt = FAULTED_SIZE;
6782 written = -EFAULT;
6783 } else
6784 written = cnt;
6785
6786 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6787 /* do not add \n before testing triggers, but add \0 */
6788 entry->buf[cnt] = '\0';
6789 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6790 }
6791
6792 if (entry->buf[cnt - 1] != '\n') {
6793 entry->buf[cnt] = '\n';
6794 entry->buf[cnt + 1] = '\0';
6795 } else
6796 entry->buf[cnt] = '\0';
6797
6798 if (static_branch_unlikely(&trace_marker_exports_enabled))
6799 ftrace_exports(event, TRACE_EXPORT_MARKER);
6800 __buffer_unlock_commit(buffer, event);
6801
6802 if (tt)
6803 event_triggers_post_call(tr->trace_marker_file, tt);
6804
6805 if (written > 0)
6806 *fpos += written;
6807
6808 return written;
6809 }
6810
6811 /* Limit it for now to 3K (including tag) */
6812 #define RAW_DATA_MAX_SIZE (1024*3)
6813
6814 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6815 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6816 size_t cnt, loff_t *fpos)
6817 {
6818 struct trace_array *tr = filp->private_data;
6819 struct ring_buffer_event *event;
6820 struct trace_buffer *buffer;
6821 struct raw_data_entry *entry;
6822 unsigned long irq_flags;
6823 ssize_t written;
6824 int size;
6825 int len;
6826
6827 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6828
6829 if (tracing_disabled)
6830 return -EINVAL;
6831
6832 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6833 return -EINVAL;
6834
6835 /* The marker must at least have a tag id */
6836 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6837 return -EINVAL;
6838
6839 if (cnt > TRACE_BUF_SIZE)
6840 cnt = TRACE_BUF_SIZE;
6841
6842 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6843
6844 local_save_flags(irq_flags);
6845 size = sizeof(*entry) + cnt;
6846 if (cnt < FAULT_SIZE_ID)
6847 size += FAULT_SIZE_ID - cnt;
6848
6849 buffer = tr->array_buffer.buffer;
6850 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6851 irq_flags, preempt_count());
6852 if (!event)
6853 /* Ring buffer disabled, return as if not open for write */
6854 return -EBADF;
6855
6856 entry = ring_buffer_event_data(event);
6857
6858 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6859 if (len) {
6860 entry->id = -1;
6861 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6862 written = -EFAULT;
6863 } else
6864 written = cnt;
6865
6866 __buffer_unlock_commit(buffer, event);
6867
6868 if (written > 0)
6869 *fpos += written;
6870
6871 return written;
6872 }
6873
tracing_clock_show(struct seq_file * m,void * v)6874 static int tracing_clock_show(struct seq_file *m, void *v)
6875 {
6876 struct trace_array *tr = m->private;
6877 int i;
6878
6879 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6880 seq_printf(m,
6881 "%s%s%s%s", i ? " " : "",
6882 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6883 i == tr->clock_id ? "]" : "");
6884 seq_putc(m, '\n');
6885
6886 return 0;
6887 }
6888
tracing_set_clock(struct trace_array * tr,const char * clockstr)6889 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6890 {
6891 int i;
6892
6893 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6894 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6895 break;
6896 }
6897 if (i == ARRAY_SIZE(trace_clocks))
6898 return -EINVAL;
6899
6900 mutex_lock(&trace_types_lock);
6901
6902 tr->clock_id = i;
6903
6904 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6905
6906 /*
6907 * New clock may not be consistent with the previous clock.
6908 * Reset the buffer so that it doesn't have incomparable timestamps.
6909 */
6910 tracing_reset_online_cpus(&tr->array_buffer);
6911
6912 #ifdef CONFIG_TRACER_MAX_TRACE
6913 if (tr->max_buffer.buffer)
6914 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6915 tracing_reset_online_cpus(&tr->max_buffer);
6916 #endif
6917
6918 mutex_unlock(&trace_types_lock);
6919
6920 return 0;
6921 }
6922
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6923 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6924 size_t cnt, loff_t *fpos)
6925 {
6926 struct seq_file *m = filp->private_data;
6927 struct trace_array *tr = m->private;
6928 char buf[64];
6929 const char *clockstr;
6930 int ret;
6931
6932 if (cnt >= sizeof(buf))
6933 return -EINVAL;
6934
6935 if (copy_from_user(buf, ubuf, cnt))
6936 return -EFAULT;
6937
6938 buf[cnt] = 0;
6939
6940 clockstr = strstrip(buf);
6941
6942 ret = tracing_set_clock(tr, clockstr);
6943 if (ret)
6944 return ret;
6945
6946 *fpos += cnt;
6947
6948 return cnt;
6949 }
6950
tracing_clock_open(struct inode * inode,struct file * file)6951 static int tracing_clock_open(struct inode *inode, struct file *file)
6952 {
6953 struct trace_array *tr = inode->i_private;
6954 int ret;
6955
6956 ret = tracing_check_open_get_tr(tr);
6957 if (ret)
6958 return ret;
6959
6960 ret = single_open(file, tracing_clock_show, inode->i_private);
6961 if (ret < 0)
6962 trace_array_put(tr);
6963
6964 return ret;
6965 }
6966
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6967 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6968 {
6969 struct trace_array *tr = m->private;
6970
6971 mutex_lock(&trace_types_lock);
6972
6973 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6974 seq_puts(m, "delta [absolute]\n");
6975 else
6976 seq_puts(m, "[delta] absolute\n");
6977
6978 mutex_unlock(&trace_types_lock);
6979
6980 return 0;
6981 }
6982
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6983 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6984 {
6985 struct trace_array *tr = inode->i_private;
6986 int ret;
6987
6988 ret = tracing_check_open_get_tr(tr);
6989 if (ret)
6990 return ret;
6991
6992 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6993 if (ret < 0)
6994 trace_array_put(tr);
6995
6996 return ret;
6997 }
6998
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)6999 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
7000 {
7001 int ret = 0;
7002
7003 mutex_lock(&trace_types_lock);
7004
7005 if (abs && tr->time_stamp_abs_ref++)
7006 goto out;
7007
7008 if (!abs) {
7009 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
7010 ret = -EINVAL;
7011 goto out;
7012 }
7013
7014 if (--tr->time_stamp_abs_ref)
7015 goto out;
7016 }
7017
7018 ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
7019
7020 #ifdef CONFIG_TRACER_MAX_TRACE
7021 if (tr->max_buffer.buffer)
7022 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
7023 #endif
7024 out:
7025 mutex_unlock(&trace_types_lock);
7026
7027 return ret;
7028 }
7029
7030 struct ftrace_buffer_info {
7031 struct trace_iterator iter;
7032 void *spare;
7033 unsigned int spare_cpu;
7034 unsigned int read;
7035 };
7036
7037 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7038 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7039 {
7040 struct trace_array *tr = inode->i_private;
7041 struct trace_iterator *iter;
7042 struct seq_file *m;
7043 int ret;
7044
7045 ret = tracing_check_open_get_tr(tr);
7046 if (ret)
7047 return ret;
7048
7049 if (file->f_mode & FMODE_READ) {
7050 iter = __tracing_open(inode, file, true);
7051 if (IS_ERR(iter))
7052 ret = PTR_ERR(iter);
7053 } else {
7054 /* Writes still need the seq_file to hold the private data */
7055 ret = -ENOMEM;
7056 m = kzalloc(sizeof(*m), GFP_KERNEL);
7057 if (!m)
7058 goto out;
7059 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7060 if (!iter) {
7061 kfree(m);
7062 goto out;
7063 }
7064 ret = 0;
7065
7066 iter->tr = tr;
7067 iter->array_buffer = &tr->max_buffer;
7068 iter->cpu_file = tracing_get_cpu(inode);
7069 m->private = iter;
7070 file->private_data = m;
7071 }
7072 out:
7073 if (ret < 0)
7074 trace_array_put(tr);
7075
7076 return ret;
7077 }
7078
tracing_swap_cpu_buffer(void * tr)7079 static void tracing_swap_cpu_buffer(void *tr)
7080 {
7081 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7082 }
7083
7084 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7085 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7086 loff_t *ppos)
7087 {
7088 struct seq_file *m = filp->private_data;
7089 struct trace_iterator *iter = m->private;
7090 struct trace_array *tr = iter->tr;
7091 unsigned long val;
7092 int ret;
7093
7094 ret = tracing_update_buffers();
7095 if (ret < 0)
7096 return ret;
7097
7098 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7099 if (ret)
7100 return ret;
7101
7102 mutex_lock(&trace_types_lock);
7103
7104 if (tr->current_trace->use_max_tr) {
7105 ret = -EBUSY;
7106 goto out;
7107 }
7108
7109 local_irq_disable();
7110 arch_spin_lock(&tr->max_lock);
7111 if (tr->cond_snapshot)
7112 ret = -EBUSY;
7113 arch_spin_unlock(&tr->max_lock);
7114 local_irq_enable();
7115 if (ret)
7116 goto out;
7117
7118 switch (val) {
7119 case 0:
7120 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7121 ret = -EINVAL;
7122 break;
7123 }
7124 if (tr->allocated_snapshot)
7125 free_snapshot(tr);
7126 break;
7127 case 1:
7128 /* Only allow per-cpu swap if the ring buffer supports it */
7129 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7130 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7131 ret = -EINVAL;
7132 break;
7133 }
7134 #endif
7135 if (tr->allocated_snapshot)
7136 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7137 &tr->array_buffer, iter->cpu_file);
7138 else
7139 ret = tracing_alloc_snapshot_instance(tr);
7140 if (ret < 0)
7141 break;
7142 /* Now, we're going to swap */
7143 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7144 local_irq_disable();
7145 update_max_tr(tr, current, smp_processor_id(), NULL);
7146 local_irq_enable();
7147 } else {
7148 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7149 (void *)tr, 1);
7150 }
7151 break;
7152 default:
7153 if (tr->allocated_snapshot) {
7154 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7155 tracing_reset_online_cpus(&tr->max_buffer);
7156 else
7157 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7158 }
7159 break;
7160 }
7161
7162 if (ret >= 0) {
7163 *ppos += cnt;
7164 ret = cnt;
7165 }
7166 out:
7167 mutex_unlock(&trace_types_lock);
7168 return ret;
7169 }
7170
tracing_snapshot_release(struct inode * inode,struct file * file)7171 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7172 {
7173 struct seq_file *m = file->private_data;
7174 int ret;
7175
7176 ret = tracing_release(inode, file);
7177
7178 if (file->f_mode & FMODE_READ)
7179 return ret;
7180
7181 /* If write only, the seq_file is just a stub */
7182 if (m)
7183 kfree(m->private);
7184 kfree(m);
7185
7186 return 0;
7187 }
7188
7189 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7190 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7191 size_t count, loff_t *ppos);
7192 static int tracing_buffers_release(struct inode *inode, struct file *file);
7193 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7194 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7195
snapshot_raw_open(struct inode * inode,struct file * filp)7196 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7197 {
7198 struct ftrace_buffer_info *info;
7199 int ret;
7200
7201 /* The following checks for tracefs lockdown */
7202 ret = tracing_buffers_open(inode, filp);
7203 if (ret < 0)
7204 return ret;
7205
7206 info = filp->private_data;
7207
7208 if (info->iter.trace->use_max_tr) {
7209 tracing_buffers_release(inode, filp);
7210 return -EBUSY;
7211 }
7212
7213 info->iter.snapshot = true;
7214 info->iter.array_buffer = &info->iter.tr->max_buffer;
7215
7216 return ret;
7217 }
7218
7219 #endif /* CONFIG_TRACER_SNAPSHOT */
7220
7221
7222 static const struct file_operations tracing_thresh_fops = {
7223 .open = tracing_open_generic,
7224 .read = tracing_thresh_read,
7225 .write = tracing_thresh_write,
7226 .llseek = generic_file_llseek,
7227 };
7228
7229 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7230 static const struct file_operations tracing_max_lat_fops = {
7231 .open = tracing_open_generic,
7232 .read = tracing_max_lat_read,
7233 .write = tracing_max_lat_write,
7234 .llseek = generic_file_llseek,
7235 };
7236 #endif
7237
7238 static const struct file_operations set_tracer_fops = {
7239 .open = tracing_open_generic_tr,
7240 .read = tracing_set_trace_read,
7241 .write = tracing_set_trace_write,
7242 .llseek = generic_file_llseek,
7243 .release = tracing_release_generic_tr,
7244 };
7245
7246 static const struct file_operations tracing_pipe_fops = {
7247 .open = tracing_open_pipe,
7248 .poll = tracing_poll_pipe,
7249 .read = tracing_read_pipe,
7250 .splice_read = tracing_splice_read_pipe,
7251 .release = tracing_release_pipe,
7252 .llseek = no_llseek,
7253 };
7254
7255 static const struct file_operations tracing_entries_fops = {
7256 .open = tracing_open_generic_tr,
7257 .read = tracing_entries_read,
7258 .write = tracing_entries_write,
7259 .llseek = generic_file_llseek,
7260 .release = tracing_release_generic_tr,
7261 };
7262
7263 static const struct file_operations tracing_total_entries_fops = {
7264 .open = tracing_open_generic_tr,
7265 .read = tracing_total_entries_read,
7266 .llseek = generic_file_llseek,
7267 .release = tracing_release_generic_tr,
7268 };
7269
7270 static const struct file_operations tracing_free_buffer_fops = {
7271 .open = tracing_open_generic_tr,
7272 .write = tracing_free_buffer_write,
7273 .release = tracing_free_buffer_release,
7274 };
7275
7276 static const struct file_operations tracing_mark_fops = {
7277 .open = tracing_open_generic_tr,
7278 .write = tracing_mark_write,
7279 .llseek = generic_file_llseek,
7280 .release = tracing_release_generic_tr,
7281 };
7282
7283 static const struct file_operations tracing_mark_raw_fops = {
7284 .open = tracing_open_generic_tr,
7285 .write = tracing_mark_raw_write,
7286 .llseek = generic_file_llseek,
7287 .release = tracing_release_generic_tr,
7288 };
7289
7290 static const struct file_operations trace_clock_fops = {
7291 .open = tracing_clock_open,
7292 .read = seq_read,
7293 .llseek = seq_lseek,
7294 .release = tracing_single_release_tr,
7295 .write = tracing_clock_write,
7296 };
7297
7298 static const struct file_operations trace_time_stamp_mode_fops = {
7299 .open = tracing_time_stamp_mode_open,
7300 .read = seq_read,
7301 .llseek = seq_lseek,
7302 .release = tracing_single_release_tr,
7303 };
7304
7305 #ifdef CONFIG_TRACER_SNAPSHOT
7306 static const struct file_operations snapshot_fops = {
7307 .open = tracing_snapshot_open,
7308 .read = seq_read,
7309 .write = tracing_snapshot_write,
7310 .llseek = tracing_lseek,
7311 .release = tracing_snapshot_release,
7312 };
7313
7314 static const struct file_operations snapshot_raw_fops = {
7315 .open = snapshot_raw_open,
7316 .read = tracing_buffers_read,
7317 .release = tracing_buffers_release,
7318 .splice_read = tracing_buffers_splice_read,
7319 .llseek = no_llseek,
7320 };
7321
7322 #endif /* CONFIG_TRACER_SNAPSHOT */
7323
7324 #define TRACING_LOG_ERRS_MAX 8
7325 #define TRACING_LOG_LOC_MAX 128
7326
7327 #define CMD_PREFIX " Command: "
7328
7329 struct err_info {
7330 const char **errs; /* ptr to loc-specific array of err strings */
7331 u8 type; /* index into errs -> specific err string */
7332 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7333 u64 ts;
7334 };
7335
7336 struct tracing_log_err {
7337 struct list_head list;
7338 struct err_info info;
7339 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7340 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7341 };
7342
7343 static DEFINE_MUTEX(tracing_err_log_lock);
7344
get_tracing_log_err(struct trace_array * tr)7345 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7346 {
7347 struct tracing_log_err *err;
7348
7349 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7350 err = kzalloc(sizeof(*err), GFP_KERNEL);
7351 if (!err)
7352 err = ERR_PTR(-ENOMEM);
7353 else
7354 tr->n_err_log_entries++;
7355
7356 return err;
7357 }
7358
7359 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7360 list_del(&err->list);
7361
7362 return err;
7363 }
7364
7365 /**
7366 * err_pos - find the position of a string within a command for error careting
7367 * @cmd: The tracing command that caused the error
7368 * @str: The string to position the caret at within @cmd
7369 *
7370 * Finds the position of the first occurence of @str within @cmd. The
7371 * return value can be passed to tracing_log_err() for caret placement
7372 * within @cmd.
7373 *
7374 * Returns the index within @cmd of the first occurence of @str or 0
7375 * if @str was not found.
7376 */
err_pos(char * cmd,const char * str)7377 unsigned int err_pos(char *cmd, const char *str)
7378 {
7379 char *found;
7380
7381 if (WARN_ON(!strlen(cmd)))
7382 return 0;
7383
7384 found = strstr(cmd, str);
7385 if (found)
7386 return found - cmd;
7387
7388 return 0;
7389 }
7390
7391 /**
7392 * tracing_log_err - write an error to the tracing error log
7393 * @tr: The associated trace array for the error (NULL for top level array)
7394 * @loc: A string describing where the error occurred
7395 * @cmd: The tracing command that caused the error
7396 * @errs: The array of loc-specific static error strings
7397 * @type: The index into errs[], which produces the specific static err string
7398 * @pos: The position the caret should be placed in the cmd
7399 *
7400 * Writes an error into tracing/error_log of the form:
7401 *
7402 * <loc>: error: <text>
7403 * Command: <cmd>
7404 * ^
7405 *
7406 * tracing/error_log is a small log file containing the last
7407 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7408 * unless there has been a tracing error, and the error log can be
7409 * cleared and have its memory freed by writing the empty string in
7410 * truncation mode to it i.e. echo > tracing/error_log.
7411 *
7412 * NOTE: the @errs array along with the @type param are used to
7413 * produce a static error string - this string is not copied and saved
7414 * when the error is logged - only a pointer to it is saved. See
7415 * existing callers for examples of how static strings are typically
7416 * defined for use with tracing_log_err().
7417 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7418 void tracing_log_err(struct trace_array *tr,
7419 const char *loc, const char *cmd,
7420 const char **errs, u8 type, u8 pos)
7421 {
7422 struct tracing_log_err *err;
7423
7424 if (!tr)
7425 tr = &global_trace;
7426
7427 mutex_lock(&tracing_err_log_lock);
7428 err = get_tracing_log_err(tr);
7429 if (PTR_ERR(err) == -ENOMEM) {
7430 mutex_unlock(&tracing_err_log_lock);
7431 return;
7432 }
7433
7434 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7435 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7436
7437 err->info.errs = errs;
7438 err->info.type = type;
7439 err->info.pos = pos;
7440 err->info.ts = local_clock();
7441
7442 list_add_tail(&err->list, &tr->err_log);
7443 mutex_unlock(&tracing_err_log_lock);
7444 }
7445
clear_tracing_err_log(struct trace_array * tr)7446 static void clear_tracing_err_log(struct trace_array *tr)
7447 {
7448 struct tracing_log_err *err, *next;
7449
7450 mutex_lock(&tracing_err_log_lock);
7451 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7452 list_del(&err->list);
7453 kfree(err);
7454 }
7455
7456 tr->n_err_log_entries = 0;
7457 mutex_unlock(&tracing_err_log_lock);
7458 }
7459
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7460 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7461 {
7462 struct trace_array *tr = m->private;
7463
7464 mutex_lock(&tracing_err_log_lock);
7465
7466 return seq_list_start(&tr->err_log, *pos);
7467 }
7468
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7469 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7470 {
7471 struct trace_array *tr = m->private;
7472
7473 return seq_list_next(v, &tr->err_log, pos);
7474 }
7475
tracing_err_log_seq_stop(struct seq_file * m,void * v)7476 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7477 {
7478 mutex_unlock(&tracing_err_log_lock);
7479 }
7480
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7481 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7482 {
7483 u8 i;
7484
7485 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7486 seq_putc(m, ' ');
7487 for (i = 0; i < pos; i++)
7488 seq_putc(m, ' ');
7489 seq_puts(m, "^\n");
7490 }
7491
tracing_err_log_seq_show(struct seq_file * m,void * v)7492 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7493 {
7494 struct tracing_log_err *err = v;
7495
7496 if (err) {
7497 const char *err_text = err->info.errs[err->info.type];
7498 u64 sec = err->info.ts;
7499 u32 nsec;
7500
7501 nsec = do_div(sec, NSEC_PER_SEC);
7502 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7503 err->loc, err_text);
7504 seq_printf(m, "%s", err->cmd);
7505 tracing_err_log_show_pos(m, err->info.pos);
7506 }
7507
7508 return 0;
7509 }
7510
7511 static const struct seq_operations tracing_err_log_seq_ops = {
7512 .start = tracing_err_log_seq_start,
7513 .next = tracing_err_log_seq_next,
7514 .stop = tracing_err_log_seq_stop,
7515 .show = tracing_err_log_seq_show
7516 };
7517
tracing_err_log_open(struct inode * inode,struct file * file)7518 static int tracing_err_log_open(struct inode *inode, struct file *file)
7519 {
7520 struct trace_array *tr = inode->i_private;
7521 int ret = 0;
7522
7523 ret = tracing_check_open_get_tr(tr);
7524 if (ret)
7525 return ret;
7526
7527 /* If this file was opened for write, then erase contents */
7528 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7529 clear_tracing_err_log(tr);
7530
7531 if (file->f_mode & FMODE_READ) {
7532 ret = seq_open(file, &tracing_err_log_seq_ops);
7533 if (!ret) {
7534 struct seq_file *m = file->private_data;
7535 m->private = tr;
7536 } else {
7537 trace_array_put(tr);
7538 }
7539 }
7540 return ret;
7541 }
7542
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7543 static ssize_t tracing_err_log_write(struct file *file,
7544 const char __user *buffer,
7545 size_t count, loff_t *ppos)
7546 {
7547 return count;
7548 }
7549
tracing_err_log_release(struct inode * inode,struct file * file)7550 static int tracing_err_log_release(struct inode *inode, struct file *file)
7551 {
7552 struct trace_array *tr = inode->i_private;
7553
7554 trace_array_put(tr);
7555
7556 if (file->f_mode & FMODE_READ)
7557 seq_release(inode, file);
7558
7559 return 0;
7560 }
7561
7562 static const struct file_operations tracing_err_log_fops = {
7563 .open = tracing_err_log_open,
7564 .write = tracing_err_log_write,
7565 .read = seq_read,
7566 .llseek = tracing_lseek,
7567 .release = tracing_err_log_release,
7568 };
7569
tracing_buffers_open(struct inode * inode,struct file * filp)7570 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7571 {
7572 struct trace_array *tr = inode->i_private;
7573 struct ftrace_buffer_info *info;
7574 int ret;
7575
7576 ret = tracing_check_open_get_tr(tr);
7577 if (ret)
7578 return ret;
7579
7580 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7581 if (!info) {
7582 trace_array_put(tr);
7583 return -ENOMEM;
7584 }
7585
7586 mutex_lock(&trace_types_lock);
7587
7588 info->iter.tr = tr;
7589 info->iter.cpu_file = tracing_get_cpu(inode);
7590 info->iter.trace = tr->current_trace;
7591 info->iter.array_buffer = &tr->array_buffer;
7592 info->spare = NULL;
7593 /* Force reading ring buffer for first read */
7594 info->read = (unsigned int)-1;
7595
7596 filp->private_data = info;
7597
7598 tr->trace_ref++;
7599
7600 mutex_unlock(&trace_types_lock);
7601
7602 ret = nonseekable_open(inode, filp);
7603 if (ret < 0)
7604 trace_array_put(tr);
7605
7606 return ret;
7607 }
7608
7609 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7610 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7611 {
7612 struct ftrace_buffer_info *info = filp->private_data;
7613 struct trace_iterator *iter = &info->iter;
7614
7615 return trace_poll(iter, filp, poll_table);
7616 }
7617
7618 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7619 tracing_buffers_read(struct file *filp, char __user *ubuf,
7620 size_t count, loff_t *ppos)
7621 {
7622 struct ftrace_buffer_info *info = filp->private_data;
7623 struct trace_iterator *iter = &info->iter;
7624 ssize_t ret = 0;
7625 ssize_t size;
7626
7627 if (!count)
7628 return 0;
7629
7630 #ifdef CONFIG_TRACER_MAX_TRACE
7631 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7632 return -EBUSY;
7633 #endif
7634
7635 if (!info->spare) {
7636 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7637 iter->cpu_file);
7638 if (IS_ERR(info->spare)) {
7639 ret = PTR_ERR(info->spare);
7640 info->spare = NULL;
7641 } else {
7642 info->spare_cpu = iter->cpu_file;
7643 }
7644 }
7645 if (!info->spare)
7646 return ret;
7647
7648 /* Do we have previous read data to read? */
7649 if (info->read < PAGE_SIZE)
7650 goto read;
7651
7652 again:
7653 trace_access_lock(iter->cpu_file);
7654 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7655 &info->spare,
7656 count,
7657 iter->cpu_file, 0);
7658 trace_access_unlock(iter->cpu_file);
7659
7660 if (ret < 0) {
7661 if (trace_empty(iter)) {
7662 if ((filp->f_flags & O_NONBLOCK))
7663 return -EAGAIN;
7664
7665 ret = wait_on_pipe(iter, 0);
7666 if (ret)
7667 return ret;
7668
7669 goto again;
7670 }
7671 return 0;
7672 }
7673
7674 info->read = 0;
7675 read:
7676 size = PAGE_SIZE - info->read;
7677 if (size > count)
7678 size = count;
7679
7680 ret = copy_to_user(ubuf, info->spare + info->read, size);
7681 if (ret == size)
7682 return -EFAULT;
7683
7684 size -= ret;
7685
7686 *ppos += size;
7687 info->read += size;
7688
7689 return size;
7690 }
7691
tracing_buffers_release(struct inode * inode,struct file * file)7692 static int tracing_buffers_release(struct inode *inode, struct file *file)
7693 {
7694 struct ftrace_buffer_info *info = file->private_data;
7695 struct trace_iterator *iter = &info->iter;
7696
7697 mutex_lock(&trace_types_lock);
7698
7699 iter->tr->trace_ref--;
7700
7701 __trace_array_put(iter->tr);
7702
7703 if (info->spare)
7704 ring_buffer_free_read_page(iter->array_buffer->buffer,
7705 info->spare_cpu, info->spare);
7706 kvfree(info);
7707
7708 mutex_unlock(&trace_types_lock);
7709
7710 return 0;
7711 }
7712
7713 struct buffer_ref {
7714 struct trace_buffer *buffer;
7715 void *page;
7716 int cpu;
7717 refcount_t refcount;
7718 };
7719
buffer_ref_release(struct buffer_ref * ref)7720 static void buffer_ref_release(struct buffer_ref *ref)
7721 {
7722 if (!refcount_dec_and_test(&ref->refcount))
7723 return;
7724 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7725 kfree(ref);
7726 }
7727
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7728 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7729 struct pipe_buffer *buf)
7730 {
7731 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7732
7733 buffer_ref_release(ref);
7734 buf->private = 0;
7735 }
7736
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7737 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7738 struct pipe_buffer *buf)
7739 {
7740 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7741
7742 if (refcount_read(&ref->refcount) > INT_MAX/2)
7743 return false;
7744
7745 refcount_inc(&ref->refcount);
7746 return true;
7747 }
7748
7749 /* Pipe buffer operations for a buffer. */
7750 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7751 .release = buffer_pipe_buf_release,
7752 .get = buffer_pipe_buf_get,
7753 };
7754
7755 /*
7756 * Callback from splice_to_pipe(), if we need to release some pages
7757 * at the end of the spd in case we error'ed out in filling the pipe.
7758 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7759 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7760 {
7761 struct buffer_ref *ref =
7762 (struct buffer_ref *)spd->partial[i].private;
7763
7764 buffer_ref_release(ref);
7765 spd->partial[i].private = 0;
7766 }
7767
7768 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7769 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7770 struct pipe_inode_info *pipe, size_t len,
7771 unsigned int flags)
7772 {
7773 struct ftrace_buffer_info *info = file->private_data;
7774 struct trace_iterator *iter = &info->iter;
7775 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7776 struct page *pages_def[PIPE_DEF_BUFFERS];
7777 struct splice_pipe_desc spd = {
7778 .pages = pages_def,
7779 .partial = partial_def,
7780 .nr_pages_max = PIPE_DEF_BUFFERS,
7781 .ops = &buffer_pipe_buf_ops,
7782 .spd_release = buffer_spd_release,
7783 };
7784 struct buffer_ref *ref;
7785 int entries, i;
7786 ssize_t ret = 0;
7787
7788 #ifdef CONFIG_TRACER_MAX_TRACE
7789 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7790 return -EBUSY;
7791 #endif
7792
7793 if (*ppos & (PAGE_SIZE - 1))
7794 return -EINVAL;
7795
7796 if (len & (PAGE_SIZE - 1)) {
7797 if (len < PAGE_SIZE)
7798 return -EINVAL;
7799 len &= PAGE_MASK;
7800 }
7801
7802 if (splice_grow_spd(pipe, &spd))
7803 return -ENOMEM;
7804
7805 again:
7806 trace_access_lock(iter->cpu_file);
7807 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7808
7809 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7810 struct page *page;
7811 int r;
7812
7813 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7814 if (!ref) {
7815 ret = -ENOMEM;
7816 break;
7817 }
7818
7819 refcount_set(&ref->refcount, 1);
7820 ref->buffer = iter->array_buffer->buffer;
7821 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7822 if (IS_ERR(ref->page)) {
7823 ret = PTR_ERR(ref->page);
7824 ref->page = NULL;
7825 kfree(ref);
7826 break;
7827 }
7828 ref->cpu = iter->cpu_file;
7829
7830 r = ring_buffer_read_page(ref->buffer, &ref->page,
7831 len, iter->cpu_file, 1);
7832 if (r < 0) {
7833 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7834 ref->page);
7835 kfree(ref);
7836 break;
7837 }
7838
7839 page = virt_to_page(ref->page);
7840
7841 spd.pages[i] = page;
7842 spd.partial[i].len = PAGE_SIZE;
7843 spd.partial[i].offset = 0;
7844 spd.partial[i].private = (unsigned long)ref;
7845 spd.nr_pages++;
7846 *ppos += PAGE_SIZE;
7847
7848 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7849 }
7850
7851 trace_access_unlock(iter->cpu_file);
7852 spd.nr_pages = i;
7853
7854 /* did we read anything? */
7855 if (!spd.nr_pages) {
7856 if (ret)
7857 goto out;
7858
7859 ret = -EAGAIN;
7860 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7861 goto out;
7862
7863 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
7864 if (ret)
7865 goto out;
7866
7867 goto again;
7868 }
7869
7870 ret = splice_to_pipe(pipe, &spd);
7871 out:
7872 splice_shrink_spd(&spd);
7873
7874 return ret;
7875 }
7876
7877 static const struct file_operations tracing_buffers_fops = {
7878 .open = tracing_buffers_open,
7879 .read = tracing_buffers_read,
7880 .poll = tracing_buffers_poll,
7881 .release = tracing_buffers_release,
7882 .splice_read = tracing_buffers_splice_read,
7883 .llseek = no_llseek,
7884 };
7885
7886 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7887 tracing_stats_read(struct file *filp, char __user *ubuf,
7888 size_t count, loff_t *ppos)
7889 {
7890 struct inode *inode = file_inode(filp);
7891 struct trace_array *tr = inode->i_private;
7892 struct array_buffer *trace_buf = &tr->array_buffer;
7893 int cpu = tracing_get_cpu(inode);
7894 struct trace_seq *s;
7895 unsigned long cnt;
7896 unsigned long long t;
7897 unsigned long usec_rem;
7898
7899 s = kmalloc(sizeof(*s), GFP_KERNEL);
7900 if (!s)
7901 return -ENOMEM;
7902
7903 trace_seq_init(s);
7904
7905 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7906 trace_seq_printf(s, "entries: %ld\n", cnt);
7907
7908 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7909 trace_seq_printf(s, "overrun: %ld\n", cnt);
7910
7911 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7912 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7913
7914 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7915 trace_seq_printf(s, "bytes: %ld\n", cnt);
7916
7917 if (trace_clocks[tr->clock_id].in_ns) {
7918 /* local or global for trace_clock */
7919 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7920 usec_rem = do_div(t, USEC_PER_SEC);
7921 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7922 t, usec_rem);
7923
7924 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7925 usec_rem = do_div(t, USEC_PER_SEC);
7926 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7927 } else {
7928 /* counter or tsc mode for trace_clock */
7929 trace_seq_printf(s, "oldest event ts: %llu\n",
7930 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7931
7932 trace_seq_printf(s, "now ts: %llu\n",
7933 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7934 }
7935
7936 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7937 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7938
7939 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7940 trace_seq_printf(s, "read events: %ld\n", cnt);
7941
7942 count = simple_read_from_buffer(ubuf, count, ppos,
7943 s->buffer, trace_seq_used(s));
7944
7945 kfree(s);
7946
7947 return count;
7948 }
7949
7950 static const struct file_operations tracing_stats_fops = {
7951 .open = tracing_open_generic_tr,
7952 .read = tracing_stats_read,
7953 .llseek = generic_file_llseek,
7954 .release = tracing_release_generic_tr,
7955 };
7956
7957 #ifdef CONFIG_DYNAMIC_FTRACE
7958
7959 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7960 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7961 size_t cnt, loff_t *ppos)
7962 {
7963 ssize_t ret;
7964 char *buf;
7965 int r;
7966
7967 /* 256 should be plenty to hold the amount needed */
7968 buf = kmalloc(256, GFP_KERNEL);
7969 if (!buf)
7970 return -ENOMEM;
7971
7972 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7973 ftrace_update_tot_cnt,
7974 ftrace_number_of_pages,
7975 ftrace_number_of_groups);
7976
7977 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7978 kfree(buf);
7979 return ret;
7980 }
7981
7982 static const struct file_operations tracing_dyn_info_fops = {
7983 .open = tracing_open_generic,
7984 .read = tracing_read_dyn_info,
7985 .llseek = generic_file_llseek,
7986 };
7987 #endif /* CONFIG_DYNAMIC_FTRACE */
7988
7989 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7990 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7991 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7992 struct trace_array *tr, struct ftrace_probe_ops *ops,
7993 void *data)
7994 {
7995 tracing_snapshot_instance(tr);
7996 }
7997
7998 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7999 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8000 struct trace_array *tr, struct ftrace_probe_ops *ops,
8001 void *data)
8002 {
8003 struct ftrace_func_mapper *mapper = data;
8004 long *count = NULL;
8005
8006 if (mapper)
8007 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8008
8009 if (count) {
8010
8011 if (*count <= 0)
8012 return;
8013
8014 (*count)--;
8015 }
8016
8017 tracing_snapshot_instance(tr);
8018 }
8019
8020 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8021 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8022 struct ftrace_probe_ops *ops, void *data)
8023 {
8024 struct ftrace_func_mapper *mapper = data;
8025 long *count = NULL;
8026
8027 seq_printf(m, "%ps:", (void *)ip);
8028
8029 seq_puts(m, "snapshot");
8030
8031 if (mapper)
8032 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8033
8034 if (count)
8035 seq_printf(m, ":count=%ld\n", *count);
8036 else
8037 seq_puts(m, ":unlimited\n");
8038
8039 return 0;
8040 }
8041
8042 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8043 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8044 unsigned long ip, void *init_data, void **data)
8045 {
8046 struct ftrace_func_mapper *mapper = *data;
8047
8048 if (!mapper) {
8049 mapper = allocate_ftrace_func_mapper();
8050 if (!mapper)
8051 return -ENOMEM;
8052 *data = mapper;
8053 }
8054
8055 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8056 }
8057
8058 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8059 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8060 unsigned long ip, void *data)
8061 {
8062 struct ftrace_func_mapper *mapper = data;
8063
8064 if (!ip) {
8065 if (!mapper)
8066 return;
8067 free_ftrace_func_mapper(mapper, NULL);
8068 return;
8069 }
8070
8071 ftrace_func_mapper_remove_ip(mapper, ip);
8072 }
8073
8074 static struct ftrace_probe_ops snapshot_probe_ops = {
8075 .func = ftrace_snapshot,
8076 .print = ftrace_snapshot_print,
8077 };
8078
8079 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8080 .func = ftrace_count_snapshot,
8081 .print = ftrace_snapshot_print,
8082 .init = ftrace_snapshot_init,
8083 .free = ftrace_snapshot_free,
8084 };
8085
8086 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8087 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8088 char *glob, char *cmd, char *param, int enable)
8089 {
8090 struct ftrace_probe_ops *ops;
8091 void *count = (void *)-1;
8092 char *number;
8093 int ret;
8094
8095 if (!tr)
8096 return -ENODEV;
8097
8098 /* hash funcs only work with set_ftrace_filter */
8099 if (!enable)
8100 return -EINVAL;
8101
8102 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8103
8104 if (glob[0] == '!')
8105 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8106
8107 if (!param)
8108 goto out_reg;
8109
8110 number = strsep(¶m, ":");
8111
8112 if (!strlen(number))
8113 goto out_reg;
8114
8115 /*
8116 * We use the callback data field (which is a pointer)
8117 * as our counter.
8118 */
8119 ret = kstrtoul(number, 0, (unsigned long *)&count);
8120 if (ret)
8121 return ret;
8122
8123 out_reg:
8124 ret = tracing_alloc_snapshot_instance(tr);
8125 if (ret < 0)
8126 goto out;
8127
8128 ret = register_ftrace_function_probe(glob, tr, ops, count);
8129
8130 out:
8131 return ret < 0 ? ret : 0;
8132 }
8133
8134 static struct ftrace_func_command ftrace_snapshot_cmd = {
8135 .name = "snapshot",
8136 .func = ftrace_trace_snapshot_callback,
8137 };
8138
register_snapshot_cmd(void)8139 static __init int register_snapshot_cmd(void)
8140 {
8141 return register_ftrace_command(&ftrace_snapshot_cmd);
8142 }
8143 #else
register_snapshot_cmd(void)8144 static inline __init int register_snapshot_cmd(void) { return 0; }
8145 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8146
tracing_get_dentry(struct trace_array * tr)8147 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8148 {
8149 if (WARN_ON(!tr->dir))
8150 return ERR_PTR(-ENODEV);
8151
8152 /* Top directory uses NULL as the parent */
8153 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8154 return NULL;
8155
8156 /* All sub buffers have a descriptor */
8157 return tr->dir;
8158 }
8159
tracing_dentry_percpu(struct trace_array * tr,int cpu)8160 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8161 {
8162 struct dentry *d_tracer;
8163
8164 if (tr->percpu_dir)
8165 return tr->percpu_dir;
8166
8167 d_tracer = tracing_get_dentry(tr);
8168 if (IS_ERR(d_tracer))
8169 return NULL;
8170
8171 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8172
8173 MEM_FAIL(!tr->percpu_dir,
8174 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8175
8176 return tr->percpu_dir;
8177 }
8178
8179 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8180 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8181 void *data, long cpu, const struct file_operations *fops)
8182 {
8183 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8184
8185 if (ret) /* See tracing_get_cpu() */
8186 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8187 return ret;
8188 }
8189
8190 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8191 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8192 {
8193 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8194 struct dentry *d_cpu;
8195 char cpu_dir[30]; /* 30 characters should be more than enough */
8196
8197 if (!d_percpu)
8198 return;
8199
8200 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8201 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8202 if (!d_cpu) {
8203 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8204 return;
8205 }
8206
8207 /* per cpu trace_pipe */
8208 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8209 tr, cpu, &tracing_pipe_fops);
8210
8211 /* per cpu trace */
8212 trace_create_cpu_file("trace", 0644, d_cpu,
8213 tr, cpu, &tracing_fops);
8214
8215 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8216 tr, cpu, &tracing_buffers_fops);
8217
8218 trace_create_cpu_file("stats", 0444, d_cpu,
8219 tr, cpu, &tracing_stats_fops);
8220
8221 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8222 tr, cpu, &tracing_entries_fops);
8223
8224 #ifdef CONFIG_TRACER_SNAPSHOT
8225 trace_create_cpu_file("snapshot", 0644, d_cpu,
8226 tr, cpu, &snapshot_fops);
8227
8228 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8229 tr, cpu, &snapshot_raw_fops);
8230 #endif
8231 }
8232
8233 #ifdef CONFIG_FTRACE_SELFTEST
8234 /* Let selftest have access to static functions in this file */
8235 #include "trace_selftest.c"
8236 #endif
8237
8238 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8239 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8240 loff_t *ppos)
8241 {
8242 struct trace_option_dentry *topt = filp->private_data;
8243 char *buf;
8244
8245 if (topt->flags->val & topt->opt->bit)
8246 buf = "1\n";
8247 else
8248 buf = "0\n";
8249
8250 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8251 }
8252
8253 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8254 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8255 loff_t *ppos)
8256 {
8257 struct trace_option_dentry *topt = filp->private_data;
8258 unsigned long val;
8259 int ret;
8260
8261 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8262 if (ret)
8263 return ret;
8264
8265 if (val != 0 && val != 1)
8266 return -EINVAL;
8267
8268 if (!!(topt->flags->val & topt->opt->bit) != val) {
8269 mutex_lock(&trace_types_lock);
8270 ret = __set_tracer_option(topt->tr, topt->flags,
8271 topt->opt, !val);
8272 mutex_unlock(&trace_types_lock);
8273 if (ret)
8274 return ret;
8275 }
8276
8277 *ppos += cnt;
8278
8279 return cnt;
8280 }
8281
tracing_open_options(struct inode * inode,struct file * filp)8282 static int tracing_open_options(struct inode *inode, struct file *filp)
8283 {
8284 struct trace_option_dentry *topt = inode->i_private;
8285 int ret;
8286
8287 ret = tracing_check_open_get_tr(topt->tr);
8288 if (ret)
8289 return ret;
8290
8291 filp->private_data = inode->i_private;
8292 return 0;
8293 }
8294
tracing_release_options(struct inode * inode,struct file * file)8295 static int tracing_release_options(struct inode *inode, struct file *file)
8296 {
8297 struct trace_option_dentry *topt = file->private_data;
8298
8299 trace_array_put(topt->tr);
8300 return 0;
8301 }
8302
8303 static const struct file_operations trace_options_fops = {
8304 .open = tracing_open_options,
8305 .read = trace_options_read,
8306 .write = trace_options_write,
8307 .llseek = generic_file_llseek,
8308 .release = tracing_release_options,
8309 };
8310
8311 /*
8312 * In order to pass in both the trace_array descriptor as well as the index
8313 * to the flag that the trace option file represents, the trace_array
8314 * has a character array of trace_flags_index[], which holds the index
8315 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8316 * The address of this character array is passed to the flag option file
8317 * read/write callbacks.
8318 *
8319 * In order to extract both the index and the trace_array descriptor,
8320 * get_tr_index() uses the following algorithm.
8321 *
8322 * idx = *ptr;
8323 *
8324 * As the pointer itself contains the address of the index (remember
8325 * index[1] == 1).
8326 *
8327 * Then to get the trace_array descriptor, by subtracting that index
8328 * from the ptr, we get to the start of the index itself.
8329 *
8330 * ptr - idx == &index[0]
8331 *
8332 * Then a simple container_of() from that pointer gets us to the
8333 * trace_array descriptor.
8334 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8335 static void get_tr_index(void *data, struct trace_array **ptr,
8336 unsigned int *pindex)
8337 {
8338 *pindex = *(unsigned char *)data;
8339
8340 *ptr = container_of(data - *pindex, struct trace_array,
8341 trace_flags_index);
8342 }
8343
8344 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8345 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8346 loff_t *ppos)
8347 {
8348 void *tr_index = filp->private_data;
8349 struct trace_array *tr;
8350 unsigned int index;
8351 char *buf;
8352
8353 get_tr_index(tr_index, &tr, &index);
8354
8355 if (tr->trace_flags & (1 << index))
8356 buf = "1\n";
8357 else
8358 buf = "0\n";
8359
8360 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8361 }
8362
8363 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8364 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8365 loff_t *ppos)
8366 {
8367 void *tr_index = filp->private_data;
8368 struct trace_array *tr;
8369 unsigned int index;
8370 unsigned long val;
8371 int ret;
8372
8373 get_tr_index(tr_index, &tr, &index);
8374
8375 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8376 if (ret)
8377 return ret;
8378
8379 if (val != 0 && val != 1)
8380 return -EINVAL;
8381
8382 mutex_lock(&event_mutex);
8383 mutex_lock(&trace_types_lock);
8384 ret = set_tracer_flag(tr, 1 << index, val);
8385 mutex_unlock(&trace_types_lock);
8386 mutex_unlock(&event_mutex);
8387
8388 if (ret < 0)
8389 return ret;
8390
8391 *ppos += cnt;
8392
8393 return cnt;
8394 }
8395
8396 static const struct file_operations trace_options_core_fops = {
8397 .open = tracing_open_generic,
8398 .read = trace_options_core_read,
8399 .write = trace_options_core_write,
8400 .llseek = generic_file_llseek,
8401 };
8402
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8403 struct dentry *trace_create_file(const char *name,
8404 umode_t mode,
8405 struct dentry *parent,
8406 void *data,
8407 const struct file_operations *fops)
8408 {
8409 struct dentry *ret;
8410
8411 ret = tracefs_create_file(name, mode, parent, data, fops);
8412 if (!ret)
8413 pr_warn("Could not create tracefs '%s' entry\n", name);
8414
8415 return ret;
8416 }
8417
8418
trace_options_init_dentry(struct trace_array * tr)8419 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8420 {
8421 struct dentry *d_tracer;
8422
8423 if (tr->options)
8424 return tr->options;
8425
8426 d_tracer = tracing_get_dentry(tr);
8427 if (IS_ERR(d_tracer))
8428 return NULL;
8429
8430 tr->options = tracefs_create_dir("options", d_tracer);
8431 if (!tr->options) {
8432 pr_warn("Could not create tracefs directory 'options'\n");
8433 return NULL;
8434 }
8435
8436 return tr->options;
8437 }
8438
8439 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8440 create_trace_option_file(struct trace_array *tr,
8441 struct trace_option_dentry *topt,
8442 struct tracer_flags *flags,
8443 struct tracer_opt *opt)
8444 {
8445 struct dentry *t_options;
8446
8447 t_options = trace_options_init_dentry(tr);
8448 if (!t_options)
8449 return;
8450
8451 topt->flags = flags;
8452 topt->opt = opt;
8453 topt->tr = tr;
8454
8455 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8456 &trace_options_fops);
8457
8458 }
8459
8460 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8461 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8462 {
8463 struct trace_option_dentry *topts;
8464 struct trace_options *tr_topts;
8465 struct tracer_flags *flags;
8466 struct tracer_opt *opts;
8467 int cnt;
8468 int i;
8469
8470 if (!tracer)
8471 return;
8472
8473 flags = tracer->flags;
8474
8475 if (!flags || !flags->opts)
8476 return;
8477
8478 /*
8479 * If this is an instance, only create flags for tracers
8480 * the instance may have.
8481 */
8482 if (!trace_ok_for_array(tracer, tr))
8483 return;
8484
8485 for (i = 0; i < tr->nr_topts; i++) {
8486 /* Make sure there's no duplicate flags. */
8487 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8488 return;
8489 }
8490
8491 opts = flags->opts;
8492
8493 for (cnt = 0; opts[cnt].name; cnt++)
8494 ;
8495
8496 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8497 if (!topts)
8498 return;
8499
8500 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8501 GFP_KERNEL);
8502 if (!tr_topts) {
8503 kfree(topts);
8504 return;
8505 }
8506
8507 tr->topts = tr_topts;
8508 tr->topts[tr->nr_topts].tracer = tracer;
8509 tr->topts[tr->nr_topts].topts = topts;
8510 tr->nr_topts++;
8511
8512 for (cnt = 0; opts[cnt].name; cnt++) {
8513 create_trace_option_file(tr, &topts[cnt], flags,
8514 &opts[cnt]);
8515 MEM_FAIL(topts[cnt].entry == NULL,
8516 "Failed to create trace option: %s",
8517 opts[cnt].name);
8518 }
8519 }
8520
8521 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8522 create_trace_option_core_file(struct trace_array *tr,
8523 const char *option, long index)
8524 {
8525 struct dentry *t_options;
8526
8527 t_options = trace_options_init_dentry(tr);
8528 if (!t_options)
8529 return NULL;
8530
8531 return trace_create_file(option, 0644, t_options,
8532 (void *)&tr->trace_flags_index[index],
8533 &trace_options_core_fops);
8534 }
8535
create_trace_options_dir(struct trace_array * tr)8536 static void create_trace_options_dir(struct trace_array *tr)
8537 {
8538 struct dentry *t_options;
8539 bool top_level = tr == &global_trace;
8540 int i;
8541
8542 t_options = trace_options_init_dentry(tr);
8543 if (!t_options)
8544 return;
8545
8546 for (i = 0; trace_options[i]; i++) {
8547 if (top_level ||
8548 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8549 create_trace_option_core_file(tr, trace_options[i], i);
8550 }
8551 }
8552
8553 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8554 rb_simple_read(struct file *filp, char __user *ubuf,
8555 size_t cnt, loff_t *ppos)
8556 {
8557 struct trace_array *tr = filp->private_data;
8558 char buf[64];
8559 int r;
8560
8561 r = tracer_tracing_is_on(tr);
8562 r = sprintf(buf, "%d\n", r);
8563
8564 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8565 }
8566
8567 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8568 rb_simple_write(struct file *filp, const char __user *ubuf,
8569 size_t cnt, loff_t *ppos)
8570 {
8571 struct trace_array *tr = filp->private_data;
8572 struct trace_buffer *buffer = tr->array_buffer.buffer;
8573 unsigned long val;
8574 int ret;
8575
8576 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8577 if (ret)
8578 return ret;
8579
8580 if (buffer) {
8581 mutex_lock(&trace_types_lock);
8582 if (!!val == tracer_tracing_is_on(tr)) {
8583 val = 0; /* do nothing */
8584 } else if (val) {
8585 tracer_tracing_on(tr);
8586 if (tr->current_trace->start)
8587 tr->current_trace->start(tr);
8588 } else {
8589 tracer_tracing_off(tr);
8590 if (tr->current_trace->stop)
8591 tr->current_trace->stop(tr);
8592 }
8593 mutex_unlock(&trace_types_lock);
8594 }
8595
8596 (*ppos)++;
8597
8598 return cnt;
8599 }
8600
8601 static const struct file_operations rb_simple_fops = {
8602 .open = tracing_open_generic_tr,
8603 .read = rb_simple_read,
8604 .write = rb_simple_write,
8605 .release = tracing_release_generic_tr,
8606 .llseek = default_llseek,
8607 };
8608
8609 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8610 buffer_percent_read(struct file *filp, char __user *ubuf,
8611 size_t cnt, loff_t *ppos)
8612 {
8613 struct trace_array *tr = filp->private_data;
8614 char buf[64];
8615 int r;
8616
8617 r = tr->buffer_percent;
8618 r = sprintf(buf, "%d\n", r);
8619
8620 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8621 }
8622
8623 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8624 buffer_percent_write(struct file *filp, const char __user *ubuf,
8625 size_t cnt, loff_t *ppos)
8626 {
8627 struct trace_array *tr = filp->private_data;
8628 unsigned long val;
8629 int ret;
8630
8631 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8632 if (ret)
8633 return ret;
8634
8635 if (val > 100)
8636 return -EINVAL;
8637
8638 tr->buffer_percent = val;
8639
8640 (*ppos)++;
8641
8642 return cnt;
8643 }
8644
8645 static const struct file_operations buffer_percent_fops = {
8646 .open = tracing_open_generic_tr,
8647 .read = buffer_percent_read,
8648 .write = buffer_percent_write,
8649 .release = tracing_release_generic_tr,
8650 .llseek = default_llseek,
8651 };
8652
8653 static struct dentry *trace_instance_dir;
8654
8655 static void
8656 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8657
8658 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)8659 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8660 {
8661 enum ring_buffer_flags rb_flags;
8662
8663 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8664
8665 buf->tr = tr;
8666
8667 buf->buffer = ring_buffer_alloc(size, rb_flags);
8668 if (!buf->buffer)
8669 return -ENOMEM;
8670
8671 buf->data = alloc_percpu(struct trace_array_cpu);
8672 if (!buf->data) {
8673 ring_buffer_free(buf->buffer);
8674 buf->buffer = NULL;
8675 return -ENOMEM;
8676 }
8677
8678 /* Allocate the first page for all buffers */
8679 set_buffer_entries(&tr->array_buffer,
8680 ring_buffer_size(tr->array_buffer.buffer, 0));
8681
8682 return 0;
8683 }
8684
allocate_trace_buffers(struct trace_array * tr,int size)8685 static int allocate_trace_buffers(struct trace_array *tr, int size)
8686 {
8687 int ret;
8688
8689 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8690 if (ret)
8691 return ret;
8692
8693 #ifdef CONFIG_TRACER_MAX_TRACE
8694 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8695 allocate_snapshot ? size : 1);
8696 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8697 ring_buffer_free(tr->array_buffer.buffer);
8698 tr->array_buffer.buffer = NULL;
8699 free_percpu(tr->array_buffer.data);
8700 tr->array_buffer.data = NULL;
8701 return -ENOMEM;
8702 }
8703 tr->allocated_snapshot = allocate_snapshot;
8704
8705 /*
8706 * Only the top level trace array gets its snapshot allocated
8707 * from the kernel command line.
8708 */
8709 allocate_snapshot = false;
8710 #endif
8711
8712 return 0;
8713 }
8714
free_trace_buffer(struct array_buffer * buf)8715 static void free_trace_buffer(struct array_buffer *buf)
8716 {
8717 if (buf->buffer) {
8718 ring_buffer_free(buf->buffer);
8719 buf->buffer = NULL;
8720 free_percpu(buf->data);
8721 buf->data = NULL;
8722 }
8723 }
8724
free_trace_buffers(struct trace_array * tr)8725 static void free_trace_buffers(struct trace_array *tr)
8726 {
8727 if (!tr)
8728 return;
8729
8730 free_trace_buffer(&tr->array_buffer);
8731
8732 #ifdef CONFIG_TRACER_MAX_TRACE
8733 free_trace_buffer(&tr->max_buffer);
8734 #endif
8735 }
8736
init_trace_flags_index(struct trace_array * tr)8737 static void init_trace_flags_index(struct trace_array *tr)
8738 {
8739 int i;
8740
8741 /* Used by the trace options files */
8742 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8743 tr->trace_flags_index[i] = i;
8744 }
8745
__update_tracer_options(struct trace_array * tr)8746 static void __update_tracer_options(struct trace_array *tr)
8747 {
8748 struct tracer *t;
8749
8750 for (t = trace_types; t; t = t->next)
8751 add_tracer_options(tr, t);
8752 }
8753
update_tracer_options(struct trace_array * tr)8754 static void update_tracer_options(struct trace_array *tr)
8755 {
8756 mutex_lock(&trace_types_lock);
8757 tracer_options_updated = true;
8758 __update_tracer_options(tr);
8759 mutex_unlock(&trace_types_lock);
8760 }
8761
8762 /* Must have trace_types_lock held */
trace_array_find(const char * instance)8763 struct trace_array *trace_array_find(const char *instance)
8764 {
8765 struct trace_array *tr, *found = NULL;
8766
8767 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8768 if (tr->name && strcmp(tr->name, instance) == 0) {
8769 found = tr;
8770 break;
8771 }
8772 }
8773
8774 return found;
8775 }
8776
trace_array_find_get(const char * instance)8777 struct trace_array *trace_array_find_get(const char *instance)
8778 {
8779 struct trace_array *tr;
8780
8781 mutex_lock(&trace_types_lock);
8782 tr = trace_array_find(instance);
8783 if (tr)
8784 tr->ref++;
8785 mutex_unlock(&trace_types_lock);
8786
8787 return tr;
8788 }
8789
trace_array_create_dir(struct trace_array * tr)8790 static int trace_array_create_dir(struct trace_array *tr)
8791 {
8792 int ret;
8793
8794 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8795 if (!tr->dir)
8796 return -EINVAL;
8797
8798 ret = event_trace_add_tracer(tr->dir, tr);
8799 if (ret) {
8800 tracefs_remove(tr->dir);
8801 return ret;
8802 }
8803
8804 init_tracer_tracefs(tr, tr->dir);
8805 __update_tracer_options(tr);
8806
8807 return ret;
8808 }
8809
trace_array_create(const char * name)8810 static struct trace_array *trace_array_create(const char *name)
8811 {
8812 struct trace_array *tr;
8813 int ret;
8814
8815 ret = -ENOMEM;
8816 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8817 if (!tr)
8818 return ERR_PTR(ret);
8819
8820 tr->name = kstrdup(name, GFP_KERNEL);
8821 if (!tr->name)
8822 goto out_free_tr;
8823
8824 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8825 goto out_free_tr;
8826
8827 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8828
8829 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8830
8831 raw_spin_lock_init(&tr->start_lock);
8832
8833 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8834
8835 tr->current_trace = &nop_trace;
8836
8837 INIT_LIST_HEAD(&tr->systems);
8838 INIT_LIST_HEAD(&tr->events);
8839 INIT_LIST_HEAD(&tr->hist_vars);
8840 INIT_LIST_HEAD(&tr->err_log);
8841
8842 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8843 goto out_free_tr;
8844
8845 if (ftrace_allocate_ftrace_ops(tr) < 0)
8846 goto out_free_tr;
8847
8848 ftrace_init_trace_array(tr);
8849
8850 init_trace_flags_index(tr);
8851
8852 if (trace_instance_dir) {
8853 ret = trace_array_create_dir(tr);
8854 if (ret)
8855 goto out_free_tr;
8856 } else
8857 __trace_early_add_events(tr);
8858
8859 list_add(&tr->list, &ftrace_trace_arrays);
8860
8861 tr->ref++;
8862
8863 return tr;
8864
8865 out_free_tr:
8866 ftrace_free_ftrace_ops(tr);
8867 free_trace_buffers(tr);
8868 free_cpumask_var(tr->tracing_cpumask);
8869 kfree(tr->name);
8870 kfree(tr);
8871
8872 return ERR_PTR(ret);
8873 }
8874
instance_mkdir(const char * name)8875 static int instance_mkdir(const char *name)
8876 {
8877 struct trace_array *tr;
8878 int ret;
8879
8880 mutex_lock(&event_mutex);
8881 mutex_lock(&trace_types_lock);
8882
8883 ret = -EEXIST;
8884 if (trace_array_find(name))
8885 goto out_unlock;
8886
8887 tr = trace_array_create(name);
8888
8889 ret = PTR_ERR_OR_ZERO(tr);
8890
8891 out_unlock:
8892 mutex_unlock(&trace_types_lock);
8893 mutex_unlock(&event_mutex);
8894 return ret;
8895 }
8896
8897 /**
8898 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8899 * @name: The name of the trace array to be looked up/created.
8900 *
8901 * Returns pointer to trace array with given name.
8902 * NULL, if it cannot be created.
8903 *
8904 * NOTE: This function increments the reference counter associated with the
8905 * trace array returned. This makes sure it cannot be freed while in use.
8906 * Use trace_array_put() once the trace array is no longer needed.
8907 * If the trace_array is to be freed, trace_array_destroy() needs to
8908 * be called after the trace_array_put(), or simply let user space delete
8909 * it from the tracefs instances directory. But until the
8910 * trace_array_put() is called, user space can not delete it.
8911 *
8912 */
trace_array_get_by_name(const char * name)8913 struct trace_array *trace_array_get_by_name(const char *name)
8914 {
8915 struct trace_array *tr;
8916
8917 mutex_lock(&event_mutex);
8918 mutex_lock(&trace_types_lock);
8919
8920 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8921 if (tr->name && strcmp(tr->name, name) == 0)
8922 goto out_unlock;
8923 }
8924
8925 tr = trace_array_create(name);
8926
8927 if (IS_ERR(tr))
8928 tr = NULL;
8929 out_unlock:
8930 if (tr)
8931 tr->ref++;
8932
8933 mutex_unlock(&trace_types_lock);
8934 mutex_unlock(&event_mutex);
8935 return tr;
8936 }
8937 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8938
__remove_instance(struct trace_array * tr)8939 static int __remove_instance(struct trace_array *tr)
8940 {
8941 int i;
8942
8943 /* Reference counter for a newly created trace array = 1. */
8944 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8945 return -EBUSY;
8946
8947 list_del(&tr->list);
8948
8949 /* Disable all the flags that were enabled coming in */
8950 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8951 if ((1 << i) & ZEROED_TRACE_FLAGS)
8952 set_tracer_flag(tr, 1 << i, 0);
8953 }
8954
8955 tracing_set_nop(tr);
8956 clear_ftrace_function_probes(tr);
8957 event_trace_del_tracer(tr);
8958 ftrace_clear_pids(tr);
8959 ftrace_destroy_function_files(tr);
8960 tracefs_remove(tr->dir);
8961 free_trace_buffers(tr);
8962 clear_tracing_err_log(tr);
8963
8964 for (i = 0; i < tr->nr_topts; i++) {
8965 kfree(tr->topts[i].topts);
8966 }
8967 kfree(tr->topts);
8968
8969 free_cpumask_var(tr->tracing_cpumask);
8970 kfree(tr->name);
8971 kfree(tr);
8972
8973 return 0;
8974 }
8975
trace_array_destroy(struct trace_array * this_tr)8976 int trace_array_destroy(struct trace_array *this_tr)
8977 {
8978 struct trace_array *tr;
8979 int ret;
8980
8981 if (!this_tr)
8982 return -EINVAL;
8983
8984 mutex_lock(&event_mutex);
8985 mutex_lock(&trace_types_lock);
8986
8987 ret = -ENODEV;
8988
8989 /* Making sure trace array exists before destroying it. */
8990 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8991 if (tr == this_tr) {
8992 ret = __remove_instance(tr);
8993 break;
8994 }
8995 }
8996
8997 mutex_unlock(&trace_types_lock);
8998 mutex_unlock(&event_mutex);
8999
9000 return ret;
9001 }
9002 EXPORT_SYMBOL_GPL(trace_array_destroy);
9003
instance_rmdir(const char * name)9004 static int instance_rmdir(const char *name)
9005 {
9006 struct trace_array *tr;
9007 int ret;
9008
9009 mutex_lock(&event_mutex);
9010 mutex_lock(&trace_types_lock);
9011
9012 ret = -ENODEV;
9013 tr = trace_array_find(name);
9014 if (tr)
9015 ret = __remove_instance(tr);
9016
9017 mutex_unlock(&trace_types_lock);
9018 mutex_unlock(&event_mutex);
9019
9020 return ret;
9021 }
9022
create_trace_instances(struct dentry * d_tracer)9023 static __init void create_trace_instances(struct dentry *d_tracer)
9024 {
9025 struct trace_array *tr;
9026
9027 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9028 instance_mkdir,
9029 instance_rmdir);
9030 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9031 return;
9032
9033 mutex_lock(&event_mutex);
9034 mutex_lock(&trace_types_lock);
9035
9036 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9037 if (!tr->name)
9038 continue;
9039 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9040 "Failed to create instance directory\n"))
9041 break;
9042 }
9043
9044 mutex_unlock(&trace_types_lock);
9045 mutex_unlock(&event_mutex);
9046 }
9047
9048 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9049 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9050 {
9051 struct trace_event_file *file;
9052 int cpu;
9053
9054 trace_create_file("available_tracers", 0444, d_tracer,
9055 tr, &show_traces_fops);
9056
9057 trace_create_file("current_tracer", 0644, d_tracer,
9058 tr, &set_tracer_fops);
9059
9060 trace_create_file("tracing_cpumask", 0644, d_tracer,
9061 tr, &tracing_cpumask_fops);
9062
9063 trace_create_file("trace_options", 0644, d_tracer,
9064 tr, &tracing_iter_fops);
9065
9066 trace_create_file("trace", 0644, d_tracer,
9067 tr, &tracing_fops);
9068
9069 trace_create_file("trace_pipe", 0444, d_tracer,
9070 tr, &tracing_pipe_fops);
9071
9072 trace_create_file("buffer_size_kb", 0644, d_tracer,
9073 tr, &tracing_entries_fops);
9074
9075 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9076 tr, &tracing_total_entries_fops);
9077
9078 trace_create_file("free_buffer", 0200, d_tracer,
9079 tr, &tracing_free_buffer_fops);
9080
9081 trace_create_file("trace_marker", 0220, d_tracer,
9082 tr, &tracing_mark_fops);
9083
9084 file = __find_event_file(tr, "ftrace", "print");
9085 if (file && file->dir)
9086 trace_create_file("trigger", 0644, file->dir, file,
9087 &event_trigger_fops);
9088 tr->trace_marker_file = file;
9089
9090 trace_create_file("trace_marker_raw", 0220, d_tracer,
9091 tr, &tracing_mark_raw_fops);
9092
9093 trace_create_file("trace_clock", 0644, d_tracer, tr,
9094 &trace_clock_fops);
9095
9096 trace_create_file("tracing_on", 0644, d_tracer,
9097 tr, &rb_simple_fops);
9098
9099 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9100 &trace_time_stamp_mode_fops);
9101
9102 tr->buffer_percent = 50;
9103
9104 trace_create_file("buffer_percent", 0444, d_tracer,
9105 tr, &buffer_percent_fops);
9106
9107 create_trace_options_dir(tr);
9108
9109 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9110 trace_create_maxlat_file(tr, d_tracer);
9111 #endif
9112
9113 if (ftrace_create_function_files(tr, d_tracer))
9114 MEM_FAIL(1, "Could not allocate function filter files");
9115
9116 #ifdef CONFIG_TRACER_SNAPSHOT
9117 trace_create_file("snapshot", 0644, d_tracer,
9118 tr, &snapshot_fops);
9119 #endif
9120
9121 trace_create_file("error_log", 0644, d_tracer,
9122 tr, &tracing_err_log_fops);
9123
9124 for_each_tracing_cpu(cpu)
9125 tracing_init_tracefs_percpu(tr, cpu);
9126
9127 ftrace_init_tracefs(tr, d_tracer);
9128 }
9129
9130 #ifndef CONFIG_TRACEFS_DISABLE_AUTOMOUNT
trace_automount(struct dentry * mntpt,void * ingore)9131 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9132 {
9133 struct vfsmount *mnt;
9134 struct file_system_type *type;
9135
9136 /*
9137 * To maintain backward compatibility for tools that mount
9138 * debugfs to get to the tracing facility, tracefs is automatically
9139 * mounted to the debugfs/tracing directory.
9140 */
9141 type = get_fs_type("tracefs");
9142 if (!type)
9143 return NULL;
9144 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9145 put_filesystem(type);
9146 if (IS_ERR(mnt))
9147 return NULL;
9148 mntget(mnt);
9149
9150 return mnt;
9151 }
9152 #endif
9153
9154 /**
9155 * tracing_init_dentry - initialize top level trace array
9156 *
9157 * This is called when creating files or directories in the tracing
9158 * directory. It is called via fs_initcall() by any of the boot up code
9159 * and expects to return the dentry of the top level tracing directory.
9160 */
tracing_init_dentry(void)9161 int tracing_init_dentry(void)
9162 {
9163 struct trace_array *tr = &global_trace;
9164
9165 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9166 pr_warn("Tracing disabled due to lockdown\n");
9167 return -EPERM;
9168 }
9169
9170 /* The top level trace array uses NULL as parent */
9171 if (tr->dir)
9172 return 0;
9173
9174 if (WARN_ON(!tracefs_initialized()))
9175 return -ENODEV;
9176
9177 #ifndef CONFIG_TRACEFS_DISABLE_AUTOMOUNT
9178 /*
9179 * As there may still be users that expect the tracing
9180 * files to exist in debugfs/tracing, we must automount
9181 * the tracefs file system there, so older tools still
9182 * work with the newer kerenl.
9183 */
9184 tr->dir = debugfs_create_automount("tracing", NULL,
9185 trace_automount, NULL);
9186 #else
9187 tr->dir = ERR_PTR(-ENODEV);
9188 #endif
9189
9190 return 0;
9191 }
9192
9193 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9194 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9195
trace_eval_init(void)9196 static void __init trace_eval_init(void)
9197 {
9198 int len;
9199
9200 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9201 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9202 }
9203
9204 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9205 static void trace_module_add_evals(struct module *mod)
9206 {
9207 if (!mod->num_trace_evals)
9208 return;
9209
9210 /*
9211 * Modules with bad taint do not have events created, do
9212 * not bother with enums either.
9213 */
9214 if (trace_module_has_bad_taint(mod))
9215 return;
9216
9217 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9218 }
9219
9220 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9221 static void trace_module_remove_evals(struct module *mod)
9222 {
9223 union trace_eval_map_item *map;
9224 union trace_eval_map_item **last = &trace_eval_maps;
9225
9226 if (!mod->num_trace_evals)
9227 return;
9228
9229 mutex_lock(&trace_eval_mutex);
9230
9231 map = trace_eval_maps;
9232
9233 while (map) {
9234 if (map->head.mod == mod)
9235 break;
9236 map = trace_eval_jmp_to_tail(map);
9237 last = &map->tail.next;
9238 map = map->tail.next;
9239 }
9240 if (!map)
9241 goto out;
9242
9243 *last = trace_eval_jmp_to_tail(map)->tail.next;
9244 kfree(map);
9245 out:
9246 mutex_unlock(&trace_eval_mutex);
9247 }
9248 #else
trace_module_remove_evals(struct module * mod)9249 static inline void trace_module_remove_evals(struct module *mod) { }
9250 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9251
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9252 static int trace_module_notify(struct notifier_block *self,
9253 unsigned long val, void *data)
9254 {
9255 struct module *mod = data;
9256
9257 switch (val) {
9258 case MODULE_STATE_COMING:
9259 trace_module_add_evals(mod);
9260 break;
9261 case MODULE_STATE_GOING:
9262 trace_module_remove_evals(mod);
9263 break;
9264 }
9265
9266 return NOTIFY_OK;
9267 }
9268
9269 static struct notifier_block trace_module_nb = {
9270 .notifier_call = trace_module_notify,
9271 .priority = 0,
9272 };
9273 #endif /* CONFIG_MODULES */
9274
tracer_init_tracefs(void)9275 static __init int tracer_init_tracefs(void)
9276 {
9277 int ret;
9278
9279 trace_access_lock_init();
9280
9281 ret = tracing_init_dentry();
9282 if (ret)
9283 return 0;
9284
9285 event_trace_init();
9286
9287 init_tracer_tracefs(&global_trace, NULL);
9288 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9289
9290 trace_create_file("tracing_thresh", 0644, NULL,
9291 &global_trace, &tracing_thresh_fops);
9292
9293 trace_create_file("README", 0444, NULL,
9294 NULL, &tracing_readme_fops);
9295
9296 trace_create_file("saved_cmdlines", 0444, NULL,
9297 NULL, &tracing_saved_cmdlines_fops);
9298
9299 trace_create_file("saved_cmdlines_size", 0644, NULL,
9300 NULL, &tracing_saved_cmdlines_size_fops);
9301
9302 trace_create_file("saved_tgids", 0444, NULL,
9303 NULL, &tracing_saved_tgids_fops);
9304
9305 trace_eval_init();
9306
9307 trace_create_eval_file(NULL);
9308
9309 #ifdef CONFIG_MODULES
9310 register_module_notifier(&trace_module_nb);
9311 #endif
9312
9313 #ifdef CONFIG_DYNAMIC_FTRACE
9314 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9315 NULL, &tracing_dyn_info_fops);
9316 #endif
9317
9318 create_trace_instances(NULL);
9319
9320 update_tracer_options(&global_trace);
9321
9322 return 0;
9323 }
9324
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9325 static int trace_panic_handler(struct notifier_block *this,
9326 unsigned long event, void *unused)
9327 {
9328 bool ftrace_check = false;
9329
9330 trace_android_vh_ftrace_oops_enter(&ftrace_check);
9331
9332 if (ftrace_check)
9333 return NOTIFY_OK;
9334
9335 if (ftrace_dump_on_oops)
9336 ftrace_dump(ftrace_dump_on_oops);
9337
9338 trace_android_vh_ftrace_oops_exit(&ftrace_check);
9339 return NOTIFY_OK;
9340 }
9341
9342 static struct notifier_block trace_panic_notifier = {
9343 .notifier_call = trace_panic_handler,
9344 .next = NULL,
9345 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9346 };
9347
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9348 static int trace_die_handler(struct notifier_block *self,
9349 unsigned long val,
9350 void *data)
9351 {
9352 bool ftrace_check = false;
9353
9354 trace_android_vh_ftrace_oops_enter(&ftrace_check);
9355
9356 if (ftrace_check)
9357 return NOTIFY_OK;
9358
9359 switch (val) {
9360 case DIE_OOPS:
9361 if (ftrace_dump_on_oops)
9362 ftrace_dump(ftrace_dump_on_oops);
9363 break;
9364 default:
9365 break;
9366 }
9367
9368 trace_android_vh_ftrace_oops_exit(&ftrace_check);
9369 return NOTIFY_OK;
9370 }
9371
9372 static struct notifier_block trace_die_notifier = {
9373 .notifier_call = trace_die_handler,
9374 .priority = 200
9375 };
9376
9377 /*
9378 * printk is set to max of 1024, we really don't need it that big.
9379 * Nothing should be printing 1000 characters anyway.
9380 */
9381 #define TRACE_MAX_PRINT 1000
9382
9383 /*
9384 * Define here KERN_TRACE so that we have one place to modify
9385 * it if we decide to change what log level the ftrace dump
9386 * should be at.
9387 */
9388 #define KERN_TRACE KERN_EMERG
9389
9390 void
trace_printk_seq(struct trace_seq * s)9391 trace_printk_seq(struct trace_seq *s)
9392 {
9393 bool dump_printk = true;
9394
9395 /* Probably should print a warning here. */
9396 if (s->seq.len >= TRACE_MAX_PRINT)
9397 s->seq.len = TRACE_MAX_PRINT;
9398
9399 /*
9400 * More paranoid code. Although the buffer size is set to
9401 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9402 * an extra layer of protection.
9403 */
9404 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9405 s->seq.len = s->seq.size - 1;
9406
9407 /* should be zero ended, but we are paranoid. */
9408 s->buffer[s->seq.len] = 0;
9409
9410 trace_android_vh_ftrace_dump_buffer(s, &dump_printk);
9411 if (dump_printk)
9412 printk(KERN_TRACE "%s", s->buffer);
9413
9414 trace_seq_init(s);
9415 }
9416
trace_init_global_iter(struct trace_iterator * iter)9417 void trace_init_global_iter(struct trace_iterator *iter)
9418 {
9419 iter->tr = &global_trace;
9420 iter->trace = iter->tr->current_trace;
9421 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9422 iter->array_buffer = &global_trace.array_buffer;
9423
9424 if (iter->trace && iter->trace->open)
9425 iter->trace->open(iter);
9426
9427 /* Annotate start of buffers if we had overruns */
9428 if (ring_buffer_overruns(iter->array_buffer->buffer))
9429 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9430
9431 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9432 if (trace_clocks[iter->tr->clock_id].in_ns)
9433 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9434 }
9435
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9436 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9437 {
9438 /* use static because iter can be a bit big for the stack */
9439 static struct trace_iterator iter;
9440 static atomic_t dump_running;
9441 struct trace_array *tr = &global_trace;
9442 unsigned int old_userobj;
9443 unsigned long flags;
9444 int cnt = 0, cpu;
9445 bool ftrace_check = false;
9446 unsigned long size;
9447
9448 /* Only allow one dump user at a time. */
9449 if (atomic_inc_return(&dump_running) != 1) {
9450 atomic_dec(&dump_running);
9451 return;
9452 }
9453
9454 /*
9455 * Always turn off tracing when we dump.
9456 * We don't need to show trace output of what happens
9457 * between multiple crashes.
9458 *
9459 * If the user does a sysrq-z, then they can re-enable
9460 * tracing with echo 1 > tracing_on.
9461 */
9462 tracing_off();
9463
9464 local_irq_save(flags);
9465 printk_nmi_direct_enter();
9466
9467 /* Simulate the iterator */
9468 trace_init_global_iter(&iter);
9469 /* Can not use kmalloc for iter.temp */
9470 iter.temp = static_temp_buf;
9471 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9472
9473 for_each_tracing_cpu(cpu) {
9474 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9475 size = ring_buffer_size(iter.array_buffer->buffer, cpu);
9476 trace_android_vh_ftrace_size_check(size, &ftrace_check);
9477 }
9478
9479 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9480
9481 /* don't look at user memory in panic mode */
9482 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9483
9484 if (ftrace_check)
9485 goto out_enable;
9486
9487 switch (oops_dump_mode) {
9488 case DUMP_ALL:
9489 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9490 break;
9491 case DUMP_ORIG:
9492 iter.cpu_file = raw_smp_processor_id();
9493 break;
9494 case DUMP_NONE:
9495 goto out_enable;
9496 default:
9497 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9498 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9499 }
9500
9501 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9502
9503 /* Did function tracer already get disabled? */
9504 if (ftrace_is_dead()) {
9505 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9506 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9507 }
9508
9509 /*
9510 * We need to stop all tracing on all CPUS to read
9511 * the next buffer. This is a bit expensive, but is
9512 * not done often. We fill all what we can read,
9513 * and then release the locks again.
9514 */
9515
9516 while (!trace_empty(&iter)) {
9517 ftrace_check = true;
9518
9519 if (!cnt)
9520 printk(KERN_TRACE "---------------------------------\n");
9521
9522 cnt++;
9523
9524 trace_iterator_reset(&iter);
9525 trace_android_vh_ftrace_format_check(&ftrace_check);
9526 if (ftrace_check)
9527 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9528
9529 if (trace_find_next_entry_inc(&iter) != NULL) {
9530 int ret;
9531
9532 ret = print_trace_line(&iter);
9533 if (ret != TRACE_TYPE_NO_CONSUME)
9534 trace_consume(&iter);
9535 }
9536 touch_nmi_watchdog();
9537
9538 trace_printk_seq(&iter.seq);
9539 }
9540
9541 if (!cnt)
9542 printk(KERN_TRACE " (ftrace buffer empty)\n");
9543 else
9544 printk(KERN_TRACE "---------------------------------\n");
9545
9546 out_enable:
9547 tr->trace_flags |= old_userobj;
9548
9549 for_each_tracing_cpu(cpu) {
9550 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9551 }
9552 atomic_dec(&dump_running);
9553 printk_nmi_direct_exit();
9554 local_irq_restore(flags);
9555 }
9556 EXPORT_SYMBOL_GPL(ftrace_dump);
9557
trace_run_command(const char * buf,int (* createfn)(int,char **))9558 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9559 {
9560 char **argv;
9561 int argc, ret;
9562
9563 argc = 0;
9564 ret = 0;
9565 argv = argv_split(GFP_KERNEL, buf, &argc);
9566 if (!argv)
9567 return -ENOMEM;
9568
9569 if (argc)
9570 ret = createfn(argc, argv);
9571
9572 argv_free(argv);
9573
9574 return ret;
9575 }
9576
9577 #define WRITE_BUFSIZE 4096
9578
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9579 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9580 size_t count, loff_t *ppos,
9581 int (*createfn)(int, char **))
9582 {
9583 char *kbuf, *buf, *tmp;
9584 int ret = 0;
9585 size_t done = 0;
9586 size_t size;
9587
9588 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9589 if (!kbuf)
9590 return -ENOMEM;
9591
9592 while (done < count) {
9593 size = count - done;
9594
9595 if (size >= WRITE_BUFSIZE)
9596 size = WRITE_BUFSIZE - 1;
9597
9598 if (copy_from_user(kbuf, buffer + done, size)) {
9599 ret = -EFAULT;
9600 goto out;
9601 }
9602 kbuf[size] = '\0';
9603 buf = kbuf;
9604 do {
9605 tmp = strchr(buf, '\n');
9606 if (tmp) {
9607 *tmp = '\0';
9608 size = tmp - buf + 1;
9609 } else {
9610 size = strlen(buf);
9611 if (done + size < count) {
9612 if (buf != kbuf)
9613 break;
9614 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9615 pr_warn("Line length is too long: Should be less than %d\n",
9616 WRITE_BUFSIZE - 2);
9617 ret = -EINVAL;
9618 goto out;
9619 }
9620 }
9621 done += size;
9622
9623 /* Remove comments */
9624 tmp = strchr(buf, '#');
9625
9626 if (tmp)
9627 *tmp = '\0';
9628
9629 ret = trace_run_command(buf, createfn);
9630 if (ret)
9631 goto out;
9632 buf += size;
9633
9634 } while (done < count);
9635 }
9636 ret = done;
9637
9638 out:
9639 kfree(kbuf);
9640
9641 return ret;
9642 }
9643
tracer_alloc_buffers(void)9644 __init static int tracer_alloc_buffers(void)
9645 {
9646 int ring_buf_size;
9647 int ret = -ENOMEM;
9648
9649
9650 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9651 pr_warn("Tracing disabled due to lockdown\n");
9652 return -EPERM;
9653 }
9654
9655 /*
9656 * Make sure we don't accidentally add more trace options
9657 * than we have bits for.
9658 */
9659 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9660
9661 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9662 goto out;
9663
9664 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9665 goto out_free_buffer_mask;
9666
9667 /* Only allocate trace_printk buffers if a trace_printk exists */
9668 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9669 /* Must be called before global_trace.buffer is allocated */
9670 trace_printk_init_buffers();
9671
9672 /* To save memory, keep the ring buffer size to its minimum */
9673 if (ring_buffer_expanded)
9674 ring_buf_size = trace_buf_size;
9675 else
9676 ring_buf_size = 1;
9677
9678 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9679 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9680
9681 raw_spin_lock_init(&global_trace.start_lock);
9682
9683 /*
9684 * The prepare callbacks allocates some memory for the ring buffer. We
9685 * don't free the buffer if the CPU goes down. If we were to free
9686 * the buffer, then the user would lose any trace that was in the
9687 * buffer. The memory will be removed once the "instance" is removed.
9688 */
9689 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9690 "trace/RB:preapre", trace_rb_cpu_prepare,
9691 NULL);
9692 if (ret < 0)
9693 goto out_free_cpumask;
9694 /* Used for event triggers */
9695 ret = -ENOMEM;
9696 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9697 if (!temp_buffer)
9698 goto out_rm_hp_state;
9699
9700 if (trace_create_savedcmd() < 0)
9701 goto out_free_temp_buffer;
9702
9703 /* TODO: make the number of buffers hot pluggable with CPUS */
9704 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9705 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9706 goto out_free_savedcmd;
9707 }
9708
9709 if (global_trace.buffer_disabled)
9710 tracing_off();
9711
9712 if (trace_boot_clock) {
9713 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9714 if (ret < 0)
9715 pr_warn("Trace clock %s not defined, going back to default\n",
9716 trace_boot_clock);
9717 }
9718
9719 /*
9720 * register_tracer() might reference current_trace, so it
9721 * needs to be set before we register anything. This is
9722 * just a bootstrap of current_trace anyway.
9723 */
9724 global_trace.current_trace = &nop_trace;
9725
9726 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9727
9728 ftrace_init_global_array_ops(&global_trace);
9729
9730 init_trace_flags_index(&global_trace);
9731
9732 register_tracer(&nop_trace);
9733
9734 /* Function tracing may start here (via kernel command line) */
9735 init_function_trace();
9736
9737 /* All seems OK, enable tracing */
9738 tracing_disabled = 0;
9739
9740 atomic_notifier_chain_register(&panic_notifier_list,
9741 &trace_panic_notifier);
9742
9743 register_die_notifier(&trace_die_notifier);
9744
9745 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9746
9747 INIT_LIST_HEAD(&global_trace.systems);
9748 INIT_LIST_HEAD(&global_trace.events);
9749 INIT_LIST_HEAD(&global_trace.hist_vars);
9750 INIT_LIST_HEAD(&global_trace.err_log);
9751 list_add(&global_trace.list, &ftrace_trace_arrays);
9752
9753 apply_trace_boot_options();
9754
9755 register_snapshot_cmd();
9756
9757 return 0;
9758
9759 out_free_savedcmd:
9760 free_saved_cmdlines_buffer(savedcmd);
9761 out_free_temp_buffer:
9762 ring_buffer_free(temp_buffer);
9763 out_rm_hp_state:
9764 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9765 out_free_cpumask:
9766 free_cpumask_var(global_trace.tracing_cpumask);
9767 out_free_buffer_mask:
9768 free_cpumask_var(tracing_buffer_mask);
9769 out:
9770 return ret;
9771 }
9772
early_trace_init(void)9773 void __init early_trace_init(void)
9774 {
9775 if (tracepoint_printk) {
9776 tracepoint_print_iter =
9777 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9778 if (MEM_FAIL(!tracepoint_print_iter,
9779 "Failed to allocate trace iterator\n"))
9780 tracepoint_printk = 0;
9781 else
9782 static_key_enable(&tracepoint_printk_key.key);
9783 }
9784 tracer_alloc_buffers();
9785
9786 init_events();
9787 }
9788
trace_init(void)9789 void __init trace_init(void)
9790 {
9791 trace_event_init();
9792 }
9793
clear_boot_tracer(void)9794 __init static int clear_boot_tracer(void)
9795 {
9796 /*
9797 * The default tracer at boot buffer is an init section.
9798 * This function is called in lateinit. If we did not
9799 * find the boot tracer, then clear it out, to prevent
9800 * later registration from accessing the buffer that is
9801 * about to be freed.
9802 */
9803 if (!default_bootup_tracer)
9804 return 0;
9805
9806 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9807 default_bootup_tracer);
9808 default_bootup_tracer = NULL;
9809
9810 return 0;
9811 }
9812
9813 fs_initcall(tracer_init_tracefs);
9814 late_initcall_sync(clear_boot_tracer);
9815
9816 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9817 __init static int tracing_set_default_clock(void)
9818 {
9819 /* sched_clock_stable() is determined in late_initcall */
9820 if (!trace_boot_clock && !sched_clock_stable()) {
9821 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9822 pr_warn("Can not set tracing clock due to lockdown\n");
9823 return -EPERM;
9824 }
9825
9826 printk(KERN_WARNING
9827 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9828 "If you want to keep using the local clock, then add:\n"
9829 " \"trace_clock=local\"\n"
9830 "on the kernel command line\n");
9831 tracing_set_clock(&global_trace, "global");
9832 }
9833
9834 return 0;
9835 }
9836 late_initcall_sync(tracing_set_default_clock);
9837 #endif
9838