1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <trace/hooks/ftrace_dump.h>
53
54 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
55
56 #include "trace.h"
57 #include "trace_output.h"
58
59 /*
60 * On boot up, the ring buffer is set to the minimum size, so that
61 * we do not waste memory on systems that are not using tracing.
62 */
63 bool ring_buffer_expanded;
64
65 #ifdef CONFIG_FTRACE_STARTUP_TEST
66 /*
67 * We need to change this state when a selftest is running.
68 * A selftest will lurk into the ring-buffer to count the
69 * entries inserted during the selftest although some concurrent
70 * insertions into the ring-buffer such as trace_printk could occurred
71 * at the same time, giving false positive or negative results.
72 */
73 static bool __read_mostly tracing_selftest_running;
74
75 /*
76 * If boot-time tracing including tracers/events via kernel cmdline
77 * is running, we do not want to run SELFTEST.
78 */
79 bool __read_mostly tracing_selftest_disabled;
80
disable_tracing_selftest(const char * reason)81 void __init disable_tracing_selftest(const char *reason)
82 {
83 if (!tracing_selftest_disabled) {
84 tracing_selftest_disabled = true;
85 pr_info("Ftrace startup test is disabled due to %s\n", reason);
86 }
87 }
88 #else
89 #define tracing_selftest_running 0
90 #define tracing_selftest_disabled 0
91 #endif
92
93 /* Pipe tracepoints to printk */
94 static struct trace_iterator *tracepoint_print_iter;
95 int tracepoint_printk;
96 static bool tracepoint_printk_stop_on_boot __initdata;
97 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
98
99 /* For tracers that don't implement custom flags */
100 static struct tracer_opt dummy_tracer_opt[] = {
101 { }
102 };
103
104 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)105 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
106 {
107 return 0;
108 }
109
110 /*
111 * To prevent the comm cache from being overwritten when no
112 * tracing is active, only save the comm when a trace event
113 * occurred.
114 */
115 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
116
117 /*
118 * Kill all tracing for good (never come back).
119 * It is initialized to 1 but will turn to zero if the initialization
120 * of the tracer is successful. But that is the only place that sets
121 * this back to zero.
122 */
123 static int tracing_disabled = 1;
124
125 cpumask_var_t __read_mostly tracing_buffer_mask;
126
127 /*
128 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
129 *
130 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
131 * is set, then ftrace_dump is called. This will output the contents
132 * of the ftrace buffers to the console. This is very useful for
133 * capturing traces that lead to crashes and outputing it to a
134 * serial console.
135 *
136 * It is default off, but you can enable it with either specifying
137 * "ftrace_dump_on_oops" in the kernel command line, or setting
138 * /proc/sys/kernel/ftrace_dump_on_oops
139 * Set 1 if you want to dump buffers of all CPUs
140 * Set 2 if you want to dump the buffer of the CPU that triggered oops
141 */
142
143 enum ftrace_dump_mode ftrace_dump_on_oops;
144
145 /* When set, tracing will stop when a WARN*() is hit */
146 int __disable_trace_on_warning;
147
148 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
149 /* Map of enums to their values, for "eval_map" file */
150 struct trace_eval_map_head {
151 struct module *mod;
152 unsigned long length;
153 };
154
155 union trace_eval_map_item;
156
157 struct trace_eval_map_tail {
158 /*
159 * "end" is first and points to NULL as it must be different
160 * than "mod" or "eval_string"
161 */
162 union trace_eval_map_item *next;
163 const char *end; /* points to NULL */
164 };
165
166 static DEFINE_MUTEX(trace_eval_mutex);
167
168 /*
169 * The trace_eval_maps are saved in an array with two extra elements,
170 * one at the beginning, and one at the end. The beginning item contains
171 * the count of the saved maps (head.length), and the module they
172 * belong to if not built in (head.mod). The ending item contains a
173 * pointer to the next array of saved eval_map items.
174 */
175 union trace_eval_map_item {
176 struct trace_eval_map map;
177 struct trace_eval_map_head head;
178 struct trace_eval_map_tail tail;
179 };
180
181 static union trace_eval_map_item *trace_eval_maps;
182 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
183
184 int tracing_set_tracer(struct trace_array *tr, const char *buf);
185 static void ftrace_trace_userstack(struct trace_array *tr,
186 struct trace_buffer *buffer,
187 unsigned int trace_ctx);
188
189 #define MAX_TRACER_SIZE 100
190 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
191 static char *default_bootup_tracer;
192
193 static bool allocate_snapshot;
194 static bool snapshot_at_boot;
195
196 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
197 static int boot_instance_index;
198
199 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
200 static int boot_snapshot_index;
201
set_cmdline_ftrace(char * str)202 static int __init set_cmdline_ftrace(char *str)
203 {
204 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
205 default_bootup_tracer = bootup_tracer_buf;
206 /* We are using ftrace early, expand it */
207 ring_buffer_expanded = true;
208 return 1;
209 }
210 __setup("ftrace=", set_cmdline_ftrace);
211
set_ftrace_dump_on_oops(char * str)212 static int __init set_ftrace_dump_on_oops(char *str)
213 {
214 if (*str++ != '=' || !*str || !strcmp("1", str)) {
215 ftrace_dump_on_oops = DUMP_ALL;
216 return 1;
217 }
218
219 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
220 ftrace_dump_on_oops = DUMP_ORIG;
221 return 1;
222 }
223
224 return 0;
225 }
226 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
227
stop_trace_on_warning(char * str)228 static int __init stop_trace_on_warning(char *str)
229 {
230 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
231 __disable_trace_on_warning = 1;
232 return 1;
233 }
234 __setup("traceoff_on_warning", stop_trace_on_warning);
235
boot_alloc_snapshot(char * str)236 static int __init boot_alloc_snapshot(char *str)
237 {
238 char *slot = boot_snapshot_info + boot_snapshot_index;
239 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
240 int ret;
241
242 if (str[0] == '=') {
243 str++;
244 if (strlen(str) >= left)
245 return -1;
246
247 ret = snprintf(slot, left, "%s\t", str);
248 boot_snapshot_index += ret;
249 } else {
250 allocate_snapshot = true;
251 /* We also need the main ring buffer expanded */
252 ring_buffer_expanded = true;
253 }
254 return 1;
255 }
256 __setup("alloc_snapshot", boot_alloc_snapshot);
257
258
boot_snapshot(char * str)259 static int __init boot_snapshot(char *str)
260 {
261 snapshot_at_boot = true;
262 boot_alloc_snapshot(str);
263 return 1;
264 }
265 __setup("ftrace_boot_snapshot", boot_snapshot);
266
267
boot_instance(char * str)268 static int __init boot_instance(char *str)
269 {
270 char *slot = boot_instance_info + boot_instance_index;
271 int left = sizeof(boot_instance_info) - boot_instance_index;
272 int ret;
273
274 if (strlen(str) >= left)
275 return -1;
276
277 ret = snprintf(slot, left, "%s\t", str);
278 boot_instance_index += ret;
279
280 return 1;
281 }
282 __setup("trace_instance=", boot_instance);
283
284
285 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
286
set_trace_boot_options(char * str)287 static int __init set_trace_boot_options(char *str)
288 {
289 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
290 return 1;
291 }
292 __setup("trace_options=", set_trace_boot_options);
293
294 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
295 static char *trace_boot_clock __initdata;
296
set_trace_boot_clock(char * str)297 static int __init set_trace_boot_clock(char *str)
298 {
299 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
300 trace_boot_clock = trace_boot_clock_buf;
301 return 1;
302 }
303 __setup("trace_clock=", set_trace_boot_clock);
304
set_tracepoint_printk(char * str)305 static int __init set_tracepoint_printk(char *str)
306 {
307 /* Ignore the "tp_printk_stop_on_boot" param */
308 if (*str == '_')
309 return 0;
310
311 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
312 tracepoint_printk = 1;
313 return 1;
314 }
315 __setup("tp_printk", set_tracepoint_printk);
316
set_tracepoint_printk_stop(char * str)317 static int __init set_tracepoint_printk_stop(char *str)
318 {
319 tracepoint_printk_stop_on_boot = true;
320 return 1;
321 }
322 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
323
ns2usecs(u64 nsec)324 unsigned long long ns2usecs(u64 nsec)
325 {
326 nsec += 500;
327 do_div(nsec, 1000);
328 return nsec;
329 }
330
331 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)332 trace_process_export(struct trace_export *export,
333 struct ring_buffer_event *event, int flag)
334 {
335 struct trace_entry *entry;
336 unsigned int size = 0;
337
338 if (export->flags & flag) {
339 entry = ring_buffer_event_data(event);
340 size = ring_buffer_event_length(event);
341 export->write(export, entry, size);
342 }
343 }
344
345 static DEFINE_MUTEX(ftrace_export_lock);
346
347 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
348
349 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
350 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
351 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
352
ftrace_exports_enable(struct trace_export * export)353 static inline void ftrace_exports_enable(struct trace_export *export)
354 {
355 if (export->flags & TRACE_EXPORT_FUNCTION)
356 static_branch_inc(&trace_function_exports_enabled);
357
358 if (export->flags & TRACE_EXPORT_EVENT)
359 static_branch_inc(&trace_event_exports_enabled);
360
361 if (export->flags & TRACE_EXPORT_MARKER)
362 static_branch_inc(&trace_marker_exports_enabled);
363 }
364
ftrace_exports_disable(struct trace_export * export)365 static inline void ftrace_exports_disable(struct trace_export *export)
366 {
367 if (export->flags & TRACE_EXPORT_FUNCTION)
368 static_branch_dec(&trace_function_exports_enabled);
369
370 if (export->flags & TRACE_EXPORT_EVENT)
371 static_branch_dec(&trace_event_exports_enabled);
372
373 if (export->flags & TRACE_EXPORT_MARKER)
374 static_branch_dec(&trace_marker_exports_enabled);
375 }
376
ftrace_exports(struct ring_buffer_event * event,int flag)377 static void ftrace_exports(struct ring_buffer_event *event, int flag)
378 {
379 struct trace_export *export;
380
381 preempt_disable_notrace();
382
383 export = rcu_dereference_raw_check(ftrace_exports_list);
384 while (export) {
385 trace_process_export(export, event, flag);
386 export = rcu_dereference_raw_check(export->next);
387 }
388
389 preempt_enable_notrace();
390 }
391
392 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)393 add_trace_export(struct trace_export **list, struct trace_export *export)
394 {
395 rcu_assign_pointer(export->next, *list);
396 /*
397 * We are entering export into the list but another
398 * CPU might be walking that list. We need to make sure
399 * the export->next pointer is valid before another CPU sees
400 * the export pointer included into the list.
401 */
402 rcu_assign_pointer(*list, export);
403 }
404
405 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)406 rm_trace_export(struct trace_export **list, struct trace_export *export)
407 {
408 struct trace_export **p;
409
410 for (p = list; *p != NULL; p = &(*p)->next)
411 if (*p == export)
412 break;
413
414 if (*p != export)
415 return -1;
416
417 rcu_assign_pointer(*p, (*p)->next);
418
419 return 0;
420 }
421
422 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)423 add_ftrace_export(struct trace_export **list, struct trace_export *export)
424 {
425 ftrace_exports_enable(export);
426
427 add_trace_export(list, export);
428 }
429
430 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)431 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
432 {
433 int ret;
434
435 ret = rm_trace_export(list, export);
436 ftrace_exports_disable(export);
437
438 return ret;
439 }
440
register_ftrace_export(struct trace_export * export)441 int register_ftrace_export(struct trace_export *export)
442 {
443 if (WARN_ON_ONCE(!export->write))
444 return -1;
445
446 mutex_lock(&ftrace_export_lock);
447
448 add_ftrace_export(&ftrace_exports_list, export);
449
450 mutex_unlock(&ftrace_export_lock);
451
452 return 0;
453 }
454 EXPORT_SYMBOL_GPL(register_ftrace_export);
455
unregister_ftrace_export(struct trace_export * export)456 int unregister_ftrace_export(struct trace_export *export)
457 {
458 int ret;
459
460 mutex_lock(&ftrace_export_lock);
461
462 ret = rm_ftrace_export(&ftrace_exports_list, export);
463
464 mutex_unlock(&ftrace_export_lock);
465
466 return ret;
467 }
468 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
469
470 /* trace_flags holds trace_options default values */
471 #define TRACE_DEFAULT_FLAGS \
472 (FUNCTION_DEFAULT_FLAGS | \
473 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
474 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
475 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
476 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
477 TRACE_ITER_HASH_PTR)
478
479 /* trace_options that are only supported by global_trace */
480 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
481 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
482
483 /* trace_flags that are default zero for instances */
484 #define ZEROED_TRACE_FLAGS \
485 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
486
487 /*
488 * The global_trace is the descriptor that holds the top-level tracing
489 * buffers for the live tracing.
490 */
491 static struct trace_array global_trace = {
492 .trace_flags = TRACE_DEFAULT_FLAGS,
493 };
494
495 LIST_HEAD(ftrace_trace_arrays);
496
trace_array_get(struct trace_array * this_tr)497 int trace_array_get(struct trace_array *this_tr)
498 {
499 struct trace_array *tr;
500 int ret = -ENODEV;
501
502 mutex_lock(&trace_types_lock);
503 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
504 if (tr == this_tr) {
505 tr->ref++;
506 ret = 0;
507 break;
508 }
509 }
510 mutex_unlock(&trace_types_lock);
511
512 return ret;
513 }
514
__trace_array_put(struct trace_array * this_tr)515 static void __trace_array_put(struct trace_array *this_tr)
516 {
517 WARN_ON(!this_tr->ref);
518 this_tr->ref--;
519 }
520
521 /**
522 * trace_array_put - Decrement the reference counter for this trace array.
523 * @this_tr : pointer to the trace array
524 *
525 * NOTE: Use this when we no longer need the trace array returned by
526 * trace_array_get_by_name(). This ensures the trace array can be later
527 * destroyed.
528 *
529 */
trace_array_put(struct trace_array * this_tr)530 void trace_array_put(struct trace_array *this_tr)
531 {
532 if (!this_tr)
533 return;
534
535 mutex_lock(&trace_types_lock);
536 __trace_array_put(this_tr);
537 mutex_unlock(&trace_types_lock);
538 }
539 EXPORT_SYMBOL_GPL(trace_array_put);
540
tracing_check_open_get_tr(struct trace_array * tr)541 int tracing_check_open_get_tr(struct trace_array *tr)
542 {
543 int ret;
544
545 ret = security_locked_down(LOCKDOWN_TRACEFS);
546 if (ret)
547 return ret;
548
549 if (tracing_disabled)
550 return -ENODEV;
551
552 if (tr && trace_array_get(tr) < 0)
553 return -ENODEV;
554
555 return 0;
556 }
557
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)558 int call_filter_check_discard(struct trace_event_call *call, void *rec,
559 struct trace_buffer *buffer,
560 struct ring_buffer_event *event)
561 {
562 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
563 !filter_match_preds(call->filter, rec)) {
564 __trace_event_discard_commit(buffer, event);
565 return 1;
566 }
567
568 return 0;
569 }
570
571 /**
572 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
573 * @filtered_pids: The list of pids to check
574 * @search_pid: The PID to find in @filtered_pids
575 *
576 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
577 */
578 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)579 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
580 {
581 return trace_pid_list_is_set(filtered_pids, search_pid);
582 }
583
584 /**
585 * trace_ignore_this_task - should a task be ignored for tracing
586 * @filtered_pids: The list of pids to check
587 * @filtered_no_pids: The list of pids not to be traced
588 * @task: The task that should be ignored if not filtered
589 *
590 * Checks if @task should be traced or not from @filtered_pids.
591 * Returns true if @task should *NOT* be traced.
592 * Returns false if @task should be traced.
593 */
594 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)595 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
596 struct trace_pid_list *filtered_no_pids,
597 struct task_struct *task)
598 {
599 /*
600 * If filtered_no_pids is not empty, and the task's pid is listed
601 * in filtered_no_pids, then return true.
602 * Otherwise, if filtered_pids is empty, that means we can
603 * trace all tasks. If it has content, then only trace pids
604 * within filtered_pids.
605 */
606
607 return (filtered_pids &&
608 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
609 (filtered_no_pids &&
610 trace_find_filtered_pid(filtered_no_pids, task->pid));
611 }
612
613 /**
614 * trace_filter_add_remove_task - Add or remove a task from a pid_list
615 * @pid_list: The list to modify
616 * @self: The current task for fork or NULL for exit
617 * @task: The task to add or remove
618 *
619 * If adding a task, if @self is defined, the task is only added if @self
620 * is also included in @pid_list. This happens on fork and tasks should
621 * only be added when the parent is listed. If @self is NULL, then the
622 * @task pid will be removed from the list, which would happen on exit
623 * of a task.
624 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)625 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
626 struct task_struct *self,
627 struct task_struct *task)
628 {
629 if (!pid_list)
630 return;
631
632 /* For forks, we only add if the forking task is listed */
633 if (self) {
634 if (!trace_find_filtered_pid(pid_list, self->pid))
635 return;
636 }
637
638 /* "self" is set for forks, and NULL for exits */
639 if (self)
640 trace_pid_list_set(pid_list, task->pid);
641 else
642 trace_pid_list_clear(pid_list, task->pid);
643 }
644
645 /**
646 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
647 * @pid_list: The pid list to show
648 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
649 * @pos: The position of the file
650 *
651 * This is used by the seq_file "next" operation to iterate the pids
652 * listed in a trace_pid_list structure.
653 *
654 * Returns the pid+1 as we want to display pid of zero, but NULL would
655 * stop the iteration.
656 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)657 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
658 {
659 long pid = (unsigned long)v;
660 unsigned int next;
661
662 (*pos)++;
663
664 /* pid already is +1 of the actual previous bit */
665 if (trace_pid_list_next(pid_list, pid, &next) < 0)
666 return NULL;
667
668 pid = next;
669
670 /* Return pid + 1 to allow zero to be represented */
671 return (void *)(pid + 1);
672 }
673
674 /**
675 * trace_pid_start - Used for seq_file to start reading pid lists
676 * @pid_list: The pid list to show
677 * @pos: The position of the file
678 *
679 * This is used by seq_file "start" operation to start the iteration
680 * of listing pids.
681 *
682 * Returns the pid+1 as we want to display pid of zero, but NULL would
683 * stop the iteration.
684 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)685 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
686 {
687 unsigned long pid;
688 unsigned int first;
689 loff_t l = 0;
690
691 if (trace_pid_list_first(pid_list, &first) < 0)
692 return NULL;
693
694 pid = first;
695
696 /* Return pid + 1 so that zero can be the exit value */
697 for (pid++; pid && l < *pos;
698 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
699 ;
700 return (void *)pid;
701 }
702
703 /**
704 * trace_pid_show - show the current pid in seq_file processing
705 * @m: The seq_file structure to write into
706 * @v: A void pointer of the pid (+1) value to display
707 *
708 * Can be directly used by seq_file operations to display the current
709 * pid value.
710 */
trace_pid_show(struct seq_file * m,void * v)711 int trace_pid_show(struct seq_file *m, void *v)
712 {
713 unsigned long pid = (unsigned long)v - 1;
714
715 seq_printf(m, "%lu\n", pid);
716 return 0;
717 }
718
719 /* 128 should be much more than enough */
720 #define PID_BUF_SIZE 127
721
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)722 int trace_pid_write(struct trace_pid_list *filtered_pids,
723 struct trace_pid_list **new_pid_list,
724 const char __user *ubuf, size_t cnt)
725 {
726 struct trace_pid_list *pid_list;
727 struct trace_parser parser;
728 unsigned long val;
729 int nr_pids = 0;
730 ssize_t read = 0;
731 ssize_t ret;
732 loff_t pos;
733 pid_t pid;
734
735 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
736 return -ENOMEM;
737
738 /*
739 * Always recreate a new array. The write is an all or nothing
740 * operation. Always create a new array when adding new pids by
741 * the user. If the operation fails, then the current list is
742 * not modified.
743 */
744 pid_list = trace_pid_list_alloc();
745 if (!pid_list) {
746 trace_parser_put(&parser);
747 return -ENOMEM;
748 }
749
750 if (filtered_pids) {
751 /* copy the current bits to the new max */
752 ret = trace_pid_list_first(filtered_pids, &pid);
753 while (!ret) {
754 trace_pid_list_set(pid_list, pid);
755 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
756 nr_pids++;
757 }
758 }
759
760 ret = 0;
761 while (cnt > 0) {
762
763 pos = 0;
764
765 ret = trace_get_user(&parser, ubuf, cnt, &pos);
766 if (ret < 0)
767 break;
768
769 read += ret;
770 ubuf += ret;
771 cnt -= ret;
772
773 if (!trace_parser_loaded(&parser))
774 break;
775
776 ret = -EINVAL;
777 if (kstrtoul(parser.buffer, 0, &val))
778 break;
779
780 pid = (pid_t)val;
781
782 if (trace_pid_list_set(pid_list, pid) < 0) {
783 ret = -1;
784 break;
785 }
786 nr_pids++;
787
788 trace_parser_clear(&parser);
789 ret = 0;
790 }
791 trace_parser_put(&parser);
792
793 if (ret < 0) {
794 trace_pid_list_free(pid_list);
795 return ret;
796 }
797
798 if (!nr_pids) {
799 /* Cleared the list of pids */
800 trace_pid_list_free(pid_list);
801 pid_list = NULL;
802 }
803
804 *new_pid_list = pid_list;
805
806 return read;
807 }
808
buffer_ftrace_now(struct array_buffer * buf,int cpu)809 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
810 {
811 u64 ts;
812
813 /* Early boot up does not have a buffer yet */
814 if (!buf->buffer)
815 return trace_clock_local();
816
817 ts = ring_buffer_time_stamp(buf->buffer);
818 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
819
820 return ts;
821 }
822
ftrace_now(int cpu)823 u64 ftrace_now(int cpu)
824 {
825 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
826 }
827
828 /**
829 * tracing_is_enabled - Show if global_trace has been enabled
830 *
831 * Shows if the global trace has been enabled or not. It uses the
832 * mirror flag "buffer_disabled" to be used in fast paths such as for
833 * the irqsoff tracer. But it may be inaccurate due to races. If you
834 * need to know the accurate state, use tracing_is_on() which is a little
835 * slower, but accurate.
836 */
tracing_is_enabled(void)837 int tracing_is_enabled(void)
838 {
839 /*
840 * For quick access (irqsoff uses this in fast path), just
841 * return the mirror variable of the state of the ring buffer.
842 * It's a little racy, but we don't really care.
843 */
844 smp_rmb();
845 return !global_trace.buffer_disabled;
846 }
847
848 /*
849 * trace_buf_size is the size in bytes that is allocated
850 * for a buffer. Note, the number of bytes is always rounded
851 * to page size.
852 *
853 * This number is purposely set to a low number of 16384.
854 * If the dump on oops happens, it will be much appreciated
855 * to not have to wait for all that output. Anyway this can be
856 * boot time and run time configurable.
857 */
858 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
859
860 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
861
862 /* trace_types holds a link list of available tracers. */
863 static struct tracer *trace_types __read_mostly;
864
865 /*
866 * trace_types_lock is used to protect the trace_types list.
867 */
868 DEFINE_MUTEX(trace_types_lock);
869
870 /*
871 * serialize the access of the ring buffer
872 *
873 * ring buffer serializes readers, but it is low level protection.
874 * The validity of the events (which returns by ring_buffer_peek() ..etc)
875 * are not protected by ring buffer.
876 *
877 * The content of events may become garbage if we allow other process consumes
878 * these events concurrently:
879 * A) the page of the consumed events may become a normal page
880 * (not reader page) in ring buffer, and this page will be rewritten
881 * by events producer.
882 * B) The page of the consumed events may become a page for splice_read,
883 * and this page will be returned to system.
884 *
885 * These primitives allow multi process access to different cpu ring buffer
886 * concurrently.
887 *
888 * These primitives don't distinguish read-only and read-consume access.
889 * Multi read-only access are also serialized.
890 */
891
892 #ifdef CONFIG_SMP
893 static DECLARE_RWSEM(all_cpu_access_lock);
894 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
895
trace_access_lock(int cpu)896 static inline void trace_access_lock(int cpu)
897 {
898 if (cpu == RING_BUFFER_ALL_CPUS) {
899 /* gain it for accessing the whole ring buffer. */
900 down_write(&all_cpu_access_lock);
901 } else {
902 /* gain it for accessing a cpu ring buffer. */
903
904 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
905 down_read(&all_cpu_access_lock);
906
907 /* Secondly block other access to this @cpu ring buffer. */
908 mutex_lock(&per_cpu(cpu_access_lock, cpu));
909 }
910 }
911
trace_access_unlock(int cpu)912 static inline void trace_access_unlock(int cpu)
913 {
914 if (cpu == RING_BUFFER_ALL_CPUS) {
915 up_write(&all_cpu_access_lock);
916 } else {
917 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
918 up_read(&all_cpu_access_lock);
919 }
920 }
921
trace_access_lock_init(void)922 static inline void trace_access_lock_init(void)
923 {
924 int cpu;
925
926 for_each_possible_cpu(cpu)
927 mutex_init(&per_cpu(cpu_access_lock, cpu));
928 }
929
930 #else
931
932 static DEFINE_MUTEX(access_lock);
933
trace_access_lock(int cpu)934 static inline void trace_access_lock(int cpu)
935 {
936 (void)cpu;
937 mutex_lock(&access_lock);
938 }
939
trace_access_unlock(int cpu)940 static inline void trace_access_unlock(int cpu)
941 {
942 (void)cpu;
943 mutex_unlock(&access_lock);
944 }
945
trace_access_lock_init(void)946 static inline void trace_access_lock_init(void)
947 {
948 }
949
950 #endif
951
952 #ifdef CONFIG_STACKTRACE
953 static void __ftrace_trace_stack(struct trace_buffer *buffer,
954 unsigned int trace_ctx,
955 int skip, struct pt_regs *regs);
956 static inline void ftrace_trace_stack(struct trace_array *tr,
957 struct trace_buffer *buffer,
958 unsigned int trace_ctx,
959 int skip, struct pt_regs *regs);
960
961 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)962 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
963 unsigned int trace_ctx,
964 int skip, struct pt_regs *regs)
965 {
966 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)967 static inline void ftrace_trace_stack(struct trace_array *tr,
968 struct trace_buffer *buffer,
969 unsigned long trace_ctx,
970 int skip, struct pt_regs *regs)
971 {
972 }
973
974 #endif
975
976 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)977 trace_event_setup(struct ring_buffer_event *event,
978 int type, unsigned int trace_ctx)
979 {
980 struct trace_entry *ent = ring_buffer_event_data(event);
981
982 tracing_generic_entry_update(ent, type, trace_ctx);
983 }
984
985 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)986 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
987 int type,
988 unsigned long len,
989 unsigned int trace_ctx)
990 {
991 struct ring_buffer_event *event;
992
993 event = ring_buffer_lock_reserve(buffer, len);
994 if (event != NULL)
995 trace_event_setup(event, type, trace_ctx);
996
997 return event;
998 }
999
tracer_tracing_on(struct trace_array * tr)1000 void tracer_tracing_on(struct trace_array *tr)
1001 {
1002 if (tr->array_buffer.buffer)
1003 ring_buffer_record_on(tr->array_buffer.buffer);
1004 /*
1005 * This flag is looked at when buffers haven't been allocated
1006 * yet, or by some tracers (like irqsoff), that just want to
1007 * know if the ring buffer has been disabled, but it can handle
1008 * races of where it gets disabled but we still do a record.
1009 * As the check is in the fast path of the tracers, it is more
1010 * important to be fast than accurate.
1011 */
1012 tr->buffer_disabled = 0;
1013 /* Make the flag seen by readers */
1014 smp_wmb();
1015 }
1016
1017 /**
1018 * tracing_on - enable tracing buffers
1019 *
1020 * This function enables tracing buffers that may have been
1021 * disabled with tracing_off.
1022 */
tracing_on(void)1023 void tracing_on(void)
1024 {
1025 tracer_tracing_on(&global_trace);
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_on);
1028
1029
1030 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1031 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1032 {
1033 __this_cpu_write(trace_taskinfo_save, true);
1034
1035 /* If this is the temp buffer, we need to commit fully */
1036 if (this_cpu_read(trace_buffered_event) == event) {
1037 /* Length is in event->array[0] */
1038 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1039 /* Release the temp buffer */
1040 this_cpu_dec(trace_buffered_event_cnt);
1041 /* ring_buffer_unlock_commit() enables preemption */
1042 preempt_enable_notrace();
1043 } else
1044 ring_buffer_unlock_commit(buffer);
1045 }
1046
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1047 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1048 const char *str, int size)
1049 {
1050 struct ring_buffer_event *event;
1051 struct trace_buffer *buffer;
1052 struct print_entry *entry;
1053 unsigned int trace_ctx;
1054 int alloc;
1055
1056 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1057 return 0;
1058
1059 if (unlikely(tracing_selftest_running && tr == &global_trace))
1060 return 0;
1061
1062 if (unlikely(tracing_disabled))
1063 return 0;
1064
1065 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1066
1067 trace_ctx = tracing_gen_ctx();
1068 buffer = tr->array_buffer.buffer;
1069 ring_buffer_nest_start(buffer);
1070 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1071 trace_ctx);
1072 if (!event) {
1073 size = 0;
1074 goto out;
1075 }
1076
1077 entry = ring_buffer_event_data(event);
1078 entry->ip = ip;
1079
1080 memcpy(&entry->buf, str, size);
1081
1082 /* Add a newline if necessary */
1083 if (entry->buf[size - 1] != '\n') {
1084 entry->buf[size] = '\n';
1085 entry->buf[size + 1] = '\0';
1086 } else
1087 entry->buf[size] = '\0';
1088
1089 __buffer_unlock_commit(buffer, event);
1090 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1091 out:
1092 ring_buffer_nest_end(buffer);
1093 return size;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_array_puts);
1096
1097 /**
1098 * __trace_puts - write a constant string into the trace buffer.
1099 * @ip: The address of the caller
1100 * @str: The constant string to write
1101 * @size: The size of the string.
1102 */
__trace_puts(unsigned long ip,const char * str,int size)1103 int __trace_puts(unsigned long ip, const char *str, int size)
1104 {
1105 return __trace_array_puts(&global_trace, ip, str, size);
1106 }
1107 EXPORT_SYMBOL_GPL(__trace_puts);
1108
1109 /**
1110 * __trace_bputs - write the pointer to a constant string into trace buffer
1111 * @ip: The address of the caller
1112 * @str: The constant string to write to the buffer to
1113 */
__trace_bputs(unsigned long ip,const char * str)1114 int __trace_bputs(unsigned long ip, const char *str)
1115 {
1116 struct ring_buffer_event *event;
1117 struct trace_buffer *buffer;
1118 struct bputs_entry *entry;
1119 unsigned int trace_ctx;
1120 int size = sizeof(struct bputs_entry);
1121 int ret = 0;
1122
1123 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1124 return 0;
1125
1126 if (unlikely(tracing_selftest_running || tracing_disabled))
1127 return 0;
1128
1129 trace_ctx = tracing_gen_ctx();
1130 buffer = global_trace.array_buffer.buffer;
1131
1132 ring_buffer_nest_start(buffer);
1133 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1134 trace_ctx);
1135 if (!event)
1136 goto out;
1137
1138 entry = ring_buffer_event_data(event);
1139 entry->ip = ip;
1140 entry->str = str;
1141
1142 __buffer_unlock_commit(buffer, event);
1143 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1144
1145 ret = 1;
1146 out:
1147 ring_buffer_nest_end(buffer);
1148 return ret;
1149 }
1150 EXPORT_SYMBOL_GPL(__trace_bputs);
1151
1152 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1153 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1154 void *cond_data)
1155 {
1156 struct tracer *tracer = tr->current_trace;
1157 unsigned long flags;
1158
1159 if (in_nmi()) {
1160 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1161 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1162 return;
1163 }
1164
1165 if (!tr->allocated_snapshot) {
1166 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1167 trace_array_puts(tr, "*** stopping trace here! ***\n");
1168 tracer_tracing_off(tr);
1169 return;
1170 }
1171
1172 /* Note, snapshot can not be used when the tracer uses it */
1173 if (tracer->use_max_tr) {
1174 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1175 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1176 return;
1177 }
1178
1179 local_irq_save(flags);
1180 update_max_tr(tr, current, smp_processor_id(), cond_data);
1181 local_irq_restore(flags);
1182 }
1183
tracing_snapshot_instance(struct trace_array * tr)1184 void tracing_snapshot_instance(struct trace_array *tr)
1185 {
1186 tracing_snapshot_instance_cond(tr, NULL);
1187 }
1188
1189 /**
1190 * tracing_snapshot - take a snapshot of the current buffer.
1191 *
1192 * This causes a swap between the snapshot buffer and the current live
1193 * tracing buffer. You can use this to take snapshots of the live
1194 * trace when some condition is triggered, but continue to trace.
1195 *
1196 * Note, make sure to allocate the snapshot with either
1197 * a tracing_snapshot_alloc(), or by doing it manually
1198 * with: echo 1 > /sys/kernel/tracing/snapshot
1199 *
1200 * If the snapshot buffer is not allocated, it will stop tracing.
1201 * Basically making a permanent snapshot.
1202 */
tracing_snapshot(void)1203 void tracing_snapshot(void)
1204 {
1205 struct trace_array *tr = &global_trace;
1206
1207 tracing_snapshot_instance(tr);
1208 }
1209 EXPORT_SYMBOL_GPL(tracing_snapshot);
1210
1211 /**
1212 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1213 * @tr: The tracing instance to snapshot
1214 * @cond_data: The data to be tested conditionally, and possibly saved
1215 *
1216 * This is the same as tracing_snapshot() except that the snapshot is
1217 * conditional - the snapshot will only happen if the
1218 * cond_snapshot.update() implementation receiving the cond_data
1219 * returns true, which means that the trace array's cond_snapshot
1220 * update() operation used the cond_data to determine whether the
1221 * snapshot should be taken, and if it was, presumably saved it along
1222 * with the snapshot.
1223 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1224 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1225 {
1226 tracing_snapshot_instance_cond(tr, cond_data);
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1229
1230 /**
1231 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1232 * @tr: The tracing instance
1233 *
1234 * When the user enables a conditional snapshot using
1235 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1236 * with the snapshot. This accessor is used to retrieve it.
1237 *
1238 * Should not be called from cond_snapshot.update(), since it takes
1239 * the tr->max_lock lock, which the code calling
1240 * cond_snapshot.update() has already done.
1241 *
1242 * Returns the cond_data associated with the trace array's snapshot.
1243 */
tracing_cond_snapshot_data(struct trace_array * tr)1244 void *tracing_cond_snapshot_data(struct trace_array *tr)
1245 {
1246 void *cond_data = NULL;
1247
1248 local_irq_disable();
1249 arch_spin_lock(&tr->max_lock);
1250
1251 if (tr->cond_snapshot)
1252 cond_data = tr->cond_snapshot->cond_data;
1253
1254 arch_spin_unlock(&tr->max_lock);
1255 local_irq_enable();
1256
1257 return cond_data;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1260
1261 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1262 struct array_buffer *size_buf, int cpu_id);
1263 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1264
tracing_alloc_snapshot_instance(struct trace_array * tr)1265 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1266 {
1267 int ret;
1268
1269 if (!tr->allocated_snapshot) {
1270
1271 /* allocate spare buffer */
1272 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1273 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1274 if (ret < 0)
1275 return ret;
1276
1277 tr->allocated_snapshot = true;
1278 }
1279
1280 return 0;
1281 }
1282
free_snapshot(struct trace_array * tr)1283 static void free_snapshot(struct trace_array *tr)
1284 {
1285 /*
1286 * We don't free the ring buffer. instead, resize it because
1287 * The max_tr ring buffer has some state (e.g. ring->clock) and
1288 * we want preserve it.
1289 */
1290 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1291 set_buffer_entries(&tr->max_buffer, 1);
1292 tracing_reset_online_cpus(&tr->max_buffer);
1293 tr->allocated_snapshot = false;
1294 }
1295
1296 /**
1297 * tracing_alloc_snapshot - allocate snapshot buffer.
1298 *
1299 * This only allocates the snapshot buffer if it isn't already
1300 * allocated - it doesn't also take a snapshot.
1301 *
1302 * This is meant to be used in cases where the snapshot buffer needs
1303 * to be set up for events that can't sleep but need to be able to
1304 * trigger a snapshot.
1305 */
tracing_alloc_snapshot(void)1306 int tracing_alloc_snapshot(void)
1307 {
1308 struct trace_array *tr = &global_trace;
1309 int ret;
1310
1311 ret = tracing_alloc_snapshot_instance(tr);
1312 WARN_ON(ret < 0);
1313
1314 return ret;
1315 }
1316 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1317
1318 /**
1319 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1320 *
1321 * This is similar to tracing_snapshot(), but it will allocate the
1322 * snapshot buffer if it isn't already allocated. Use this only
1323 * where it is safe to sleep, as the allocation may sleep.
1324 *
1325 * This causes a swap between the snapshot buffer and the current live
1326 * tracing buffer. You can use this to take snapshots of the live
1327 * trace when some condition is triggered, but continue to trace.
1328 */
tracing_snapshot_alloc(void)1329 void tracing_snapshot_alloc(void)
1330 {
1331 int ret;
1332
1333 ret = tracing_alloc_snapshot();
1334 if (ret < 0)
1335 return;
1336
1337 tracing_snapshot();
1338 }
1339 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1340
1341 /**
1342 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1343 * @tr: The tracing instance
1344 * @cond_data: User data to associate with the snapshot
1345 * @update: Implementation of the cond_snapshot update function
1346 *
1347 * Check whether the conditional snapshot for the given instance has
1348 * already been enabled, or if the current tracer is already using a
1349 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1350 * save the cond_data and update function inside.
1351 *
1352 * Returns 0 if successful, error otherwise.
1353 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1354 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1355 cond_update_fn_t update)
1356 {
1357 struct cond_snapshot *cond_snapshot;
1358 int ret = 0;
1359
1360 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1361 if (!cond_snapshot)
1362 return -ENOMEM;
1363
1364 cond_snapshot->cond_data = cond_data;
1365 cond_snapshot->update = update;
1366
1367 mutex_lock(&trace_types_lock);
1368
1369 ret = tracing_alloc_snapshot_instance(tr);
1370 if (ret)
1371 goto fail_unlock;
1372
1373 if (tr->current_trace->use_max_tr) {
1374 ret = -EBUSY;
1375 goto fail_unlock;
1376 }
1377
1378 /*
1379 * The cond_snapshot can only change to NULL without the
1380 * trace_types_lock. We don't care if we race with it going
1381 * to NULL, but we want to make sure that it's not set to
1382 * something other than NULL when we get here, which we can
1383 * do safely with only holding the trace_types_lock and not
1384 * having to take the max_lock.
1385 */
1386 if (tr->cond_snapshot) {
1387 ret = -EBUSY;
1388 goto fail_unlock;
1389 }
1390
1391 local_irq_disable();
1392 arch_spin_lock(&tr->max_lock);
1393 tr->cond_snapshot = cond_snapshot;
1394 arch_spin_unlock(&tr->max_lock);
1395 local_irq_enable();
1396
1397 mutex_unlock(&trace_types_lock);
1398
1399 return ret;
1400
1401 fail_unlock:
1402 mutex_unlock(&trace_types_lock);
1403 kfree(cond_snapshot);
1404 return ret;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1407
1408 /**
1409 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1410 * @tr: The tracing instance
1411 *
1412 * Check whether the conditional snapshot for the given instance is
1413 * enabled; if so, free the cond_snapshot associated with it,
1414 * otherwise return -EINVAL.
1415 *
1416 * Returns 0 if successful, error otherwise.
1417 */
tracing_snapshot_cond_disable(struct trace_array * tr)1418 int tracing_snapshot_cond_disable(struct trace_array *tr)
1419 {
1420 int ret = 0;
1421
1422 local_irq_disable();
1423 arch_spin_lock(&tr->max_lock);
1424
1425 if (!tr->cond_snapshot)
1426 ret = -EINVAL;
1427 else {
1428 kfree(tr->cond_snapshot);
1429 tr->cond_snapshot = NULL;
1430 }
1431
1432 arch_spin_unlock(&tr->max_lock);
1433 local_irq_enable();
1434
1435 return ret;
1436 }
1437 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1438 #else
tracing_snapshot(void)1439 void tracing_snapshot(void)
1440 {
1441 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1442 }
1443 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1444 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1445 {
1446 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1447 }
1448 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1449 int tracing_alloc_snapshot(void)
1450 {
1451 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1452 return -ENODEV;
1453 }
1454 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1455 void tracing_snapshot_alloc(void)
1456 {
1457 /* Give warning */
1458 tracing_snapshot();
1459 }
1460 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1461 void *tracing_cond_snapshot_data(struct trace_array *tr)
1462 {
1463 return NULL;
1464 }
1465 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1466 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1467 {
1468 return -ENODEV;
1469 }
1470 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1471 int tracing_snapshot_cond_disable(struct trace_array *tr)
1472 {
1473 return false;
1474 }
1475 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1476 #define free_snapshot(tr) do { } while (0)
1477 #endif /* CONFIG_TRACER_SNAPSHOT */
1478
tracer_tracing_off(struct trace_array * tr)1479 void tracer_tracing_off(struct trace_array *tr)
1480 {
1481 if (tr->array_buffer.buffer)
1482 ring_buffer_record_off(tr->array_buffer.buffer);
1483 /*
1484 * This flag is looked at when buffers haven't been allocated
1485 * yet, or by some tracers (like irqsoff), that just want to
1486 * know if the ring buffer has been disabled, but it can handle
1487 * races of where it gets disabled but we still do a record.
1488 * As the check is in the fast path of the tracers, it is more
1489 * important to be fast than accurate.
1490 */
1491 tr->buffer_disabled = 1;
1492 /* Make the flag seen by readers */
1493 smp_wmb();
1494 }
1495
1496 /**
1497 * tracing_off - turn off tracing buffers
1498 *
1499 * This function stops the tracing buffers from recording data.
1500 * It does not disable any overhead the tracers themselves may
1501 * be causing. This function simply causes all recording to
1502 * the ring buffers to fail.
1503 */
tracing_off(void)1504 void tracing_off(void)
1505 {
1506 tracer_tracing_off(&global_trace);
1507 }
1508 EXPORT_SYMBOL_GPL(tracing_off);
1509
disable_trace_on_warning(void)1510 void disable_trace_on_warning(void)
1511 {
1512 if (__disable_trace_on_warning) {
1513 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1514 "Disabling tracing due to warning\n");
1515 tracing_off();
1516 }
1517 }
1518
1519 /**
1520 * tracer_tracing_is_on - show real state of ring buffer enabled
1521 * @tr : the trace array to know if ring buffer is enabled
1522 *
1523 * Shows real state of the ring buffer if it is enabled or not.
1524 */
tracer_tracing_is_on(struct trace_array * tr)1525 bool tracer_tracing_is_on(struct trace_array *tr)
1526 {
1527 if (tr->array_buffer.buffer)
1528 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1529 return !tr->buffer_disabled;
1530 }
1531
1532 /**
1533 * tracing_is_on - show state of ring buffers enabled
1534 */
tracing_is_on(void)1535 int tracing_is_on(void)
1536 {
1537 return tracer_tracing_is_on(&global_trace);
1538 }
1539
set_buf_size(char * str)1540 static int __init set_buf_size(char *str)
1541 {
1542 unsigned long buf_size;
1543
1544 if (!str)
1545 return 0;
1546 buf_size = memparse(str, &str);
1547 /*
1548 * nr_entries can not be zero and the startup
1549 * tests require some buffer space. Therefore
1550 * ensure we have at least 4096 bytes of buffer.
1551 */
1552 trace_buf_size = max(4096UL, buf_size);
1553 return 1;
1554 }
1555 __setup("trace_buf_size=", set_buf_size);
1556
set_tracing_thresh(char * str)1557 static int __init set_tracing_thresh(char *str)
1558 {
1559 unsigned long threshold;
1560 int ret;
1561
1562 if (!str)
1563 return 0;
1564 ret = kstrtoul(str, 0, &threshold);
1565 if (ret < 0)
1566 return 0;
1567 tracing_thresh = threshold * 1000;
1568 return 1;
1569 }
1570 __setup("tracing_thresh=", set_tracing_thresh);
1571
nsecs_to_usecs(unsigned long nsecs)1572 unsigned long nsecs_to_usecs(unsigned long nsecs)
1573 {
1574 return nsecs / 1000;
1575 }
1576
1577 /*
1578 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581 * of strings in the order that the evals (enum) were defined.
1582 */
1583 #undef C
1584 #define C(a, b) b
1585
1586 /* These must match the bit positions in trace_iterator_flags */
1587 static const char *trace_options[] = {
1588 TRACE_FLAGS
1589 NULL
1590 };
1591
1592 static struct {
1593 u64 (*func)(void);
1594 const char *name;
1595 int in_ns; /* is this clock in nanoseconds? */
1596 } trace_clocks[] = {
1597 { trace_clock_local, "local", 1 },
1598 { trace_clock_global, "global", 1 },
1599 { trace_clock_counter, "counter", 0 },
1600 { trace_clock_jiffies, "uptime", 0 },
1601 { trace_clock, "perf", 1 },
1602 { ktime_get_mono_fast_ns, "mono", 1 },
1603 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1604 { ktime_get_boot_fast_ns, "boot", 1 },
1605 { ktime_get_tai_fast_ns, "tai", 1 },
1606 ARCH_TRACE_CLOCKS
1607 };
1608
trace_clock_in_ns(struct trace_array * tr)1609 bool trace_clock_in_ns(struct trace_array *tr)
1610 {
1611 if (trace_clocks[tr->clock_id].in_ns)
1612 return true;
1613
1614 return false;
1615 }
1616
1617 /*
1618 * trace_parser_get_init - gets the buffer for trace parser
1619 */
trace_parser_get_init(struct trace_parser * parser,int size)1620 int trace_parser_get_init(struct trace_parser *parser, int size)
1621 {
1622 memset(parser, 0, sizeof(*parser));
1623
1624 parser->buffer = kmalloc(size, GFP_KERNEL);
1625 if (!parser->buffer)
1626 return 1;
1627
1628 parser->size = size;
1629 return 0;
1630 }
1631
1632 /*
1633 * trace_parser_put - frees the buffer for trace parser
1634 */
trace_parser_put(struct trace_parser * parser)1635 void trace_parser_put(struct trace_parser *parser)
1636 {
1637 kfree(parser->buffer);
1638 parser->buffer = NULL;
1639 }
1640
1641 /*
1642 * trace_get_user - reads the user input string separated by space
1643 * (matched by isspace(ch))
1644 *
1645 * For each string found the 'struct trace_parser' is updated,
1646 * and the function returns.
1647 *
1648 * Returns number of bytes read.
1649 *
1650 * See kernel/trace/trace.h for 'struct trace_parser' details.
1651 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1652 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653 size_t cnt, loff_t *ppos)
1654 {
1655 char ch;
1656 size_t read = 0;
1657 ssize_t ret;
1658
1659 if (!*ppos)
1660 trace_parser_clear(parser);
1661
1662 ret = get_user(ch, ubuf++);
1663 if (ret)
1664 goto out;
1665
1666 read++;
1667 cnt--;
1668
1669 /*
1670 * The parser is not finished with the last write,
1671 * continue reading the user input without skipping spaces.
1672 */
1673 if (!parser->cont) {
1674 /* skip white space */
1675 while (cnt && isspace(ch)) {
1676 ret = get_user(ch, ubuf++);
1677 if (ret)
1678 goto out;
1679 read++;
1680 cnt--;
1681 }
1682
1683 parser->idx = 0;
1684
1685 /* only spaces were written */
1686 if (isspace(ch) || !ch) {
1687 *ppos += read;
1688 ret = read;
1689 goto out;
1690 }
1691 }
1692
1693 /* read the non-space input */
1694 while (cnt && !isspace(ch) && ch) {
1695 if (parser->idx < parser->size - 1)
1696 parser->buffer[parser->idx++] = ch;
1697 else {
1698 ret = -EINVAL;
1699 goto out;
1700 }
1701 ret = get_user(ch, ubuf++);
1702 if (ret)
1703 goto out;
1704 read++;
1705 cnt--;
1706 }
1707
1708 /* We either got finished input or we have to wait for another call. */
1709 if (isspace(ch) || !ch) {
1710 parser->buffer[parser->idx] = 0;
1711 parser->cont = false;
1712 } else if (parser->idx < parser->size - 1) {
1713 parser->cont = true;
1714 parser->buffer[parser->idx++] = ch;
1715 /* Make sure the parsed string always terminates with '\0'. */
1716 parser->buffer[parser->idx] = 0;
1717 } else {
1718 ret = -EINVAL;
1719 goto out;
1720 }
1721
1722 *ppos += read;
1723 ret = read;
1724
1725 out:
1726 return ret;
1727 }
1728
1729 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1730 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731 {
1732 int len;
1733
1734 if (trace_seq_used(s) <= s->seq.readpos)
1735 return -EBUSY;
1736
1737 len = trace_seq_used(s) - s->seq.readpos;
1738 if (cnt > len)
1739 cnt = len;
1740 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1741
1742 s->seq.readpos += cnt;
1743 return cnt;
1744 }
1745
1746 unsigned long __read_mostly tracing_thresh;
1747
1748 #ifdef CONFIG_TRACER_MAX_TRACE
1749 static const struct file_operations tracing_max_lat_fops;
1750
1751 #ifdef LATENCY_FS_NOTIFY
1752
1753 static struct workqueue_struct *fsnotify_wq;
1754
latency_fsnotify_workfn(struct work_struct * work)1755 static void latency_fsnotify_workfn(struct work_struct *work)
1756 {
1757 struct trace_array *tr = container_of(work, struct trace_array,
1758 fsnotify_work);
1759 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760 }
1761
latency_fsnotify_workfn_irq(struct irq_work * iwork)1762 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763 {
1764 struct trace_array *tr = container_of(iwork, struct trace_array,
1765 fsnotify_irqwork);
1766 queue_work(fsnotify_wq, &tr->fsnotify_work);
1767 }
1768
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1769 static void trace_create_maxlat_file(struct trace_array *tr,
1770 struct dentry *d_tracer)
1771 {
1772 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774 tr->d_max_latency = trace_create_file("tracing_max_latency",
1775 TRACE_MODE_WRITE,
1776 d_tracer, tr,
1777 &tracing_max_lat_fops);
1778 }
1779
latency_fsnotify_init(void)1780 __init static int latency_fsnotify_init(void)
1781 {
1782 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783 WQ_UNBOUND | WQ_HIGHPRI, 0);
1784 if (!fsnotify_wq) {
1785 pr_err("Unable to allocate tr_max_lat_wq\n");
1786 return -ENOMEM;
1787 }
1788 return 0;
1789 }
1790
1791 late_initcall_sync(latency_fsnotify_init);
1792
latency_fsnotify(struct trace_array * tr)1793 void latency_fsnotify(struct trace_array *tr)
1794 {
1795 if (!fsnotify_wq)
1796 return;
1797 /*
1798 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799 * possible that we are called from __schedule() or do_idle(), which
1800 * could cause a deadlock.
1801 */
1802 irq_work_queue(&tr->fsnotify_irqwork);
1803 }
1804
1805 #else /* !LATENCY_FS_NOTIFY */
1806
1807 #define trace_create_maxlat_file(tr, d_tracer) \
1808 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1809 d_tracer, tr, &tracing_max_lat_fops)
1810
1811 #endif
1812
1813 /*
1814 * Copy the new maximum trace into the separate maximum-trace
1815 * structure. (this way the maximum trace is permanently saved,
1816 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817 */
1818 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1819 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820 {
1821 struct array_buffer *trace_buf = &tr->array_buffer;
1822 struct array_buffer *max_buf = &tr->max_buffer;
1823 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825
1826 max_buf->cpu = cpu;
1827 max_buf->time_start = data->preempt_timestamp;
1828
1829 max_data->saved_latency = tr->max_latency;
1830 max_data->critical_start = data->critical_start;
1831 max_data->critical_end = data->critical_end;
1832
1833 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834 max_data->pid = tsk->pid;
1835 /*
1836 * If tsk == current, then use current_uid(), as that does not use
1837 * RCU. The irq tracer can be called out of RCU scope.
1838 */
1839 if (tsk == current)
1840 max_data->uid = current_uid();
1841 else
1842 max_data->uid = task_uid(tsk);
1843
1844 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845 max_data->policy = tsk->policy;
1846 max_data->rt_priority = tsk->rt_priority;
1847
1848 /* record this tasks comm */
1849 tracing_record_cmdline(tsk);
1850 latency_fsnotify(tr);
1851 }
1852
1853 /**
1854 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855 * @tr: tracer
1856 * @tsk: the task with the latency
1857 * @cpu: The cpu that initiated the trace.
1858 * @cond_data: User data associated with a conditional snapshot
1859 *
1860 * Flip the buffers between the @tr and the max_tr and record information
1861 * about which task was the cause of this latency.
1862 */
1863 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1864 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865 void *cond_data)
1866 {
1867 if (tr->stop_count)
1868 return;
1869
1870 WARN_ON_ONCE(!irqs_disabled());
1871
1872 if (!tr->allocated_snapshot) {
1873 /* Only the nop tracer should hit this when disabling */
1874 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875 return;
1876 }
1877
1878 arch_spin_lock(&tr->max_lock);
1879
1880 /* Inherit the recordable setting from array_buffer */
1881 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882 ring_buffer_record_on(tr->max_buffer.buffer);
1883 else
1884 ring_buffer_record_off(tr->max_buffer.buffer);
1885
1886 #ifdef CONFIG_TRACER_SNAPSHOT
1887 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888 arch_spin_unlock(&tr->max_lock);
1889 return;
1890 }
1891 #endif
1892 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893
1894 __update_max_tr(tr, tsk, cpu);
1895
1896 arch_spin_unlock(&tr->max_lock);
1897
1898 /* Any waiters on the old snapshot buffer need to wake up */
1899 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1900 }
1901
1902 /**
1903 * update_max_tr_single - only copy one trace over, and reset the rest
1904 * @tr: tracer
1905 * @tsk: task with the latency
1906 * @cpu: the cpu of the buffer to copy.
1907 *
1908 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1909 */
1910 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1911 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1912 {
1913 int ret;
1914
1915 if (tr->stop_count)
1916 return;
1917
1918 WARN_ON_ONCE(!irqs_disabled());
1919 if (!tr->allocated_snapshot) {
1920 /* Only the nop tracer should hit this when disabling */
1921 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1922 return;
1923 }
1924
1925 arch_spin_lock(&tr->max_lock);
1926
1927 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1928
1929 if (ret == -EBUSY) {
1930 /*
1931 * We failed to swap the buffer due to a commit taking
1932 * place on this CPU. We fail to record, but we reset
1933 * the max trace buffer (no one writes directly to it)
1934 * and flag that it failed.
1935 * Another reason is resize is in progress.
1936 */
1937 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1938 "Failed to swap buffers due to commit or resize in progress\n");
1939 }
1940
1941 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1942
1943 __update_max_tr(tr, tsk, cpu);
1944 arch_spin_unlock(&tr->max_lock);
1945 }
1946
1947 #endif /* CONFIG_TRACER_MAX_TRACE */
1948
wait_on_pipe(struct trace_iterator * iter,int full)1949 static int wait_on_pipe(struct trace_iterator *iter, int full)
1950 {
1951 int ret;
1952
1953 /* Iterators are static, they should be filled or empty */
1954 if (trace_buffer_iter(iter, iter->cpu_file))
1955 return 0;
1956
1957 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1958
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960 /*
1961 * Make sure this is still the snapshot buffer, as if a snapshot were
1962 * to happen, this would now be the main buffer.
1963 */
1964 if (iter->snapshot)
1965 iter->array_buffer = &iter->tr->max_buffer;
1966 #endif
1967 return ret;
1968 }
1969
1970 #ifdef CONFIG_FTRACE_STARTUP_TEST
1971 static bool selftests_can_run;
1972
1973 struct trace_selftests {
1974 struct list_head list;
1975 struct tracer *type;
1976 };
1977
1978 static LIST_HEAD(postponed_selftests);
1979
save_selftest(struct tracer * type)1980 static int save_selftest(struct tracer *type)
1981 {
1982 struct trace_selftests *selftest;
1983
1984 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1985 if (!selftest)
1986 return -ENOMEM;
1987
1988 selftest->type = type;
1989 list_add(&selftest->list, &postponed_selftests);
1990 return 0;
1991 }
1992
run_tracer_selftest(struct tracer * type)1993 static int run_tracer_selftest(struct tracer *type)
1994 {
1995 struct trace_array *tr = &global_trace;
1996 struct tracer *saved_tracer = tr->current_trace;
1997 int ret;
1998
1999 if (!type->selftest || tracing_selftest_disabled)
2000 return 0;
2001
2002 /*
2003 * If a tracer registers early in boot up (before scheduling is
2004 * initialized and such), then do not run its selftests yet.
2005 * Instead, run it a little later in the boot process.
2006 */
2007 if (!selftests_can_run)
2008 return save_selftest(type);
2009
2010 if (!tracing_is_on()) {
2011 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2012 type->name);
2013 return 0;
2014 }
2015
2016 /*
2017 * Run a selftest on this tracer.
2018 * Here we reset the trace buffer, and set the current
2019 * tracer to be this tracer. The tracer can then run some
2020 * internal tracing to verify that everything is in order.
2021 * If we fail, we do not register this tracer.
2022 */
2023 tracing_reset_online_cpus(&tr->array_buffer);
2024
2025 tr->current_trace = type;
2026
2027 #ifdef CONFIG_TRACER_MAX_TRACE
2028 if (type->use_max_tr) {
2029 /* If we expanded the buffers, make sure the max is expanded too */
2030 if (ring_buffer_expanded)
2031 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2032 RING_BUFFER_ALL_CPUS);
2033 tr->allocated_snapshot = true;
2034 }
2035 #endif
2036
2037 /* the test is responsible for initializing and enabling */
2038 pr_info("Testing tracer %s: ", type->name);
2039 ret = type->selftest(type, tr);
2040 /* the test is responsible for resetting too */
2041 tr->current_trace = saved_tracer;
2042 if (ret) {
2043 printk(KERN_CONT "FAILED!\n");
2044 /* Add the warning after printing 'FAILED' */
2045 WARN_ON(1);
2046 return -1;
2047 }
2048 /* Only reset on passing, to avoid touching corrupted buffers */
2049 tracing_reset_online_cpus(&tr->array_buffer);
2050
2051 #ifdef CONFIG_TRACER_MAX_TRACE
2052 if (type->use_max_tr) {
2053 tr->allocated_snapshot = false;
2054
2055 /* Shrink the max buffer again */
2056 if (ring_buffer_expanded)
2057 ring_buffer_resize(tr->max_buffer.buffer, 1,
2058 RING_BUFFER_ALL_CPUS);
2059 }
2060 #endif
2061
2062 printk(KERN_CONT "PASSED\n");
2063 return 0;
2064 }
2065
do_run_tracer_selftest(struct tracer * type)2066 static int do_run_tracer_selftest(struct tracer *type)
2067 {
2068 int ret;
2069
2070 /*
2071 * Tests can take a long time, especially if they are run one after the
2072 * other, as does happen during bootup when all the tracers are
2073 * registered. This could cause the soft lockup watchdog to trigger.
2074 */
2075 cond_resched();
2076
2077 tracing_selftest_running = true;
2078 ret = run_tracer_selftest(type);
2079 tracing_selftest_running = false;
2080
2081 return ret;
2082 }
2083
init_trace_selftests(void)2084 static __init int init_trace_selftests(void)
2085 {
2086 struct trace_selftests *p, *n;
2087 struct tracer *t, **last;
2088 int ret;
2089
2090 selftests_can_run = true;
2091
2092 mutex_lock(&trace_types_lock);
2093
2094 if (list_empty(&postponed_selftests))
2095 goto out;
2096
2097 pr_info("Running postponed tracer tests:\n");
2098
2099 tracing_selftest_running = true;
2100 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2101 /* This loop can take minutes when sanitizers are enabled, so
2102 * lets make sure we allow RCU processing.
2103 */
2104 cond_resched();
2105 ret = run_tracer_selftest(p->type);
2106 /* If the test fails, then warn and remove from available_tracers */
2107 if (ret < 0) {
2108 WARN(1, "tracer: %s failed selftest, disabling\n",
2109 p->type->name);
2110 last = &trace_types;
2111 for (t = trace_types; t; t = t->next) {
2112 if (t == p->type) {
2113 *last = t->next;
2114 break;
2115 }
2116 last = &t->next;
2117 }
2118 }
2119 list_del(&p->list);
2120 kfree(p);
2121 }
2122 tracing_selftest_running = false;
2123
2124 out:
2125 mutex_unlock(&trace_types_lock);
2126
2127 return 0;
2128 }
2129 core_initcall(init_trace_selftests);
2130 #else
run_tracer_selftest(struct tracer * type)2131 static inline int run_tracer_selftest(struct tracer *type)
2132 {
2133 return 0;
2134 }
do_run_tracer_selftest(struct tracer * type)2135 static inline int do_run_tracer_selftest(struct tracer *type)
2136 {
2137 return 0;
2138 }
2139 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2140
2141 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2142
2143 static void __init apply_trace_boot_options(void);
2144
2145 /**
2146 * register_tracer - register a tracer with the ftrace system.
2147 * @type: the plugin for the tracer
2148 *
2149 * Register a new plugin tracer.
2150 */
register_tracer(struct tracer * type)2151 int __init register_tracer(struct tracer *type)
2152 {
2153 struct tracer *t;
2154 int ret = 0;
2155
2156 if (!type->name) {
2157 pr_info("Tracer must have a name\n");
2158 return -1;
2159 }
2160
2161 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2162 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2163 return -1;
2164 }
2165
2166 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2167 pr_warn("Can not register tracer %s due to lockdown\n",
2168 type->name);
2169 return -EPERM;
2170 }
2171
2172 mutex_lock(&trace_types_lock);
2173
2174 for (t = trace_types; t; t = t->next) {
2175 if (strcmp(type->name, t->name) == 0) {
2176 /* already found */
2177 pr_info("Tracer %s already registered\n",
2178 type->name);
2179 ret = -1;
2180 goto out;
2181 }
2182 }
2183
2184 if (!type->set_flag)
2185 type->set_flag = &dummy_set_flag;
2186 if (!type->flags) {
2187 /*allocate a dummy tracer_flags*/
2188 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2189 if (!type->flags) {
2190 ret = -ENOMEM;
2191 goto out;
2192 }
2193 type->flags->val = 0;
2194 type->flags->opts = dummy_tracer_opt;
2195 } else
2196 if (!type->flags->opts)
2197 type->flags->opts = dummy_tracer_opt;
2198
2199 /* store the tracer for __set_tracer_option */
2200 type->flags->trace = type;
2201
2202 ret = do_run_tracer_selftest(type);
2203 if (ret < 0)
2204 goto out;
2205
2206 type->next = trace_types;
2207 trace_types = type;
2208 add_tracer_options(&global_trace, type);
2209
2210 out:
2211 mutex_unlock(&trace_types_lock);
2212
2213 if (ret || !default_bootup_tracer)
2214 goto out_unlock;
2215
2216 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2217 goto out_unlock;
2218
2219 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2220 /* Do we want this tracer to start on bootup? */
2221 tracing_set_tracer(&global_trace, type->name);
2222 default_bootup_tracer = NULL;
2223
2224 apply_trace_boot_options();
2225
2226 /* disable other selftests, since this will break it. */
2227 disable_tracing_selftest("running a tracer");
2228
2229 out_unlock:
2230 return ret;
2231 }
2232
tracing_reset_cpu(struct array_buffer * buf,int cpu)2233 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2234 {
2235 struct trace_buffer *buffer = buf->buffer;
2236
2237 if (!buffer)
2238 return;
2239
2240 ring_buffer_record_disable(buffer);
2241
2242 /* Make sure all commits have finished */
2243 synchronize_rcu();
2244 ring_buffer_reset_cpu(buffer, cpu);
2245
2246 ring_buffer_record_enable(buffer);
2247 }
2248
tracing_reset_online_cpus(struct array_buffer * buf)2249 void tracing_reset_online_cpus(struct array_buffer *buf)
2250 {
2251 struct trace_buffer *buffer = buf->buffer;
2252
2253 if (!buffer)
2254 return;
2255
2256 ring_buffer_record_disable(buffer);
2257
2258 /* Make sure all commits have finished */
2259 synchronize_rcu();
2260
2261 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2262
2263 ring_buffer_reset_online_cpus(buffer);
2264
2265 ring_buffer_record_enable(buffer);
2266 }
2267
2268 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2269 void tracing_reset_all_online_cpus_unlocked(void)
2270 {
2271 struct trace_array *tr;
2272
2273 lockdep_assert_held(&trace_types_lock);
2274
2275 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2276 if (!tr->clear_trace)
2277 continue;
2278 tr->clear_trace = false;
2279 tracing_reset_online_cpus(&tr->array_buffer);
2280 #ifdef CONFIG_TRACER_MAX_TRACE
2281 tracing_reset_online_cpus(&tr->max_buffer);
2282 #endif
2283 }
2284 }
2285
tracing_reset_all_online_cpus(void)2286 void tracing_reset_all_online_cpus(void)
2287 {
2288 mutex_lock(&trace_types_lock);
2289 tracing_reset_all_online_cpus_unlocked();
2290 mutex_unlock(&trace_types_lock);
2291 }
2292
2293 /*
2294 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2295 * is the tgid last observed corresponding to pid=i.
2296 */
2297 static int *tgid_map;
2298
2299 /* The maximum valid index into tgid_map. */
2300 static size_t tgid_map_max;
2301
2302 #define SAVED_CMDLINES_DEFAULT 128
2303 #define NO_CMDLINE_MAP UINT_MAX
2304 /*
2305 * Preemption must be disabled before acquiring trace_cmdline_lock.
2306 * The various trace_arrays' max_lock must be acquired in a context
2307 * where interrupt is disabled.
2308 */
2309 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2310 struct saved_cmdlines_buffer {
2311 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2312 unsigned *map_cmdline_to_pid;
2313 unsigned cmdline_num;
2314 int cmdline_idx;
2315 char saved_cmdlines[];
2316 };
2317 static struct saved_cmdlines_buffer *savedcmd;
2318
get_saved_cmdlines(int idx)2319 static inline char *get_saved_cmdlines(int idx)
2320 {
2321 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2322 }
2323
set_cmdline(int idx,const char * cmdline)2324 static inline void set_cmdline(int idx, const char *cmdline)
2325 {
2326 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2327 }
2328
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2329 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2330 {
2331 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2332
2333 kfree(s->map_cmdline_to_pid);
2334 kmemleak_free(s);
2335 free_pages((unsigned long)s, order);
2336 }
2337
allocate_cmdlines_buffer(unsigned int val)2338 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2339 {
2340 struct saved_cmdlines_buffer *s;
2341 struct page *page;
2342 int orig_size, size;
2343 int order;
2344
2345 /* Figure out how much is needed to hold the given number of cmdlines */
2346 orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2347 order = get_order(orig_size);
2348 size = 1 << (order + PAGE_SHIFT);
2349 page = alloc_pages(GFP_KERNEL, order);
2350 if (!page)
2351 return NULL;
2352
2353 s = page_address(page);
2354 kmemleak_alloc(s, size, 1, GFP_KERNEL);
2355 memset(s, 0, sizeof(*s));
2356
2357 /* Round up to actual allocation */
2358 val = (size - sizeof(*s)) / TASK_COMM_LEN;
2359 s->cmdline_num = val;
2360
2361 s->map_cmdline_to_pid = kmalloc_array(val,
2362 sizeof(*s->map_cmdline_to_pid),
2363 GFP_KERNEL);
2364 if (!s->map_cmdline_to_pid) {
2365 free_saved_cmdlines_buffer(s);
2366 return NULL;
2367 }
2368
2369 s->cmdline_idx = 0;
2370 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2371 sizeof(s->map_pid_to_cmdline));
2372 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2373 val * sizeof(*s->map_cmdline_to_pid));
2374
2375 return s;
2376 }
2377
trace_create_savedcmd(void)2378 static int trace_create_savedcmd(void)
2379 {
2380 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2381
2382 return savedcmd ? 0 : -ENOMEM;
2383 }
2384
is_tracing_stopped(void)2385 int is_tracing_stopped(void)
2386 {
2387 return global_trace.stop_count;
2388 }
2389
tracing_start_tr(struct trace_array * tr)2390 static void tracing_start_tr(struct trace_array *tr)
2391 {
2392 struct trace_buffer *buffer;
2393 unsigned long flags;
2394
2395 if (tracing_disabled)
2396 return;
2397
2398 raw_spin_lock_irqsave(&tr->start_lock, flags);
2399 if (--tr->stop_count) {
2400 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2401 /* Someone screwed up their debugging */
2402 tr->stop_count = 0;
2403 }
2404 goto out;
2405 }
2406
2407 /* Prevent the buffers from switching */
2408 arch_spin_lock(&tr->max_lock);
2409
2410 buffer = tr->array_buffer.buffer;
2411 if (buffer)
2412 ring_buffer_record_enable(buffer);
2413
2414 #ifdef CONFIG_TRACER_MAX_TRACE
2415 buffer = tr->max_buffer.buffer;
2416 if (buffer)
2417 ring_buffer_record_enable(buffer);
2418 #endif
2419
2420 arch_spin_unlock(&tr->max_lock);
2421
2422 out:
2423 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2424 }
2425
2426 /**
2427 * tracing_start - quick start of the tracer
2428 *
2429 * If tracing is enabled but was stopped by tracing_stop,
2430 * this will start the tracer back up.
2431 */
tracing_start(void)2432 void tracing_start(void)
2433
2434 {
2435 return tracing_start_tr(&global_trace);
2436 }
2437
tracing_stop_tr(struct trace_array * tr)2438 static void tracing_stop_tr(struct trace_array *tr)
2439 {
2440 struct trace_buffer *buffer;
2441 unsigned long flags;
2442
2443 raw_spin_lock_irqsave(&tr->start_lock, flags);
2444 if (tr->stop_count++)
2445 goto out;
2446
2447 /* Prevent the buffers from switching */
2448 arch_spin_lock(&tr->max_lock);
2449
2450 buffer = tr->array_buffer.buffer;
2451 if (buffer)
2452 ring_buffer_record_disable(buffer);
2453
2454 #ifdef CONFIG_TRACER_MAX_TRACE
2455 buffer = tr->max_buffer.buffer;
2456 if (buffer)
2457 ring_buffer_record_disable(buffer);
2458 #endif
2459
2460 arch_spin_unlock(&tr->max_lock);
2461
2462 out:
2463 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2464 }
2465
2466 /**
2467 * tracing_stop - quick stop of the tracer
2468 *
2469 * Light weight way to stop tracing. Use in conjunction with
2470 * tracing_start.
2471 */
tracing_stop(void)2472 void tracing_stop(void)
2473 {
2474 return tracing_stop_tr(&global_trace);
2475 }
2476
trace_save_cmdline(struct task_struct * tsk)2477 static int trace_save_cmdline(struct task_struct *tsk)
2478 {
2479 unsigned tpid, idx;
2480
2481 /* treat recording of idle task as a success */
2482 if (!tsk->pid)
2483 return 1;
2484
2485 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2486
2487 /*
2488 * It's not the end of the world if we don't get
2489 * the lock, but we also don't want to spin
2490 * nor do we want to disable interrupts,
2491 * so if we miss here, then better luck next time.
2492 *
2493 * This is called within the scheduler and wake up, so interrupts
2494 * had better been disabled and run queue lock been held.
2495 */
2496 lockdep_assert_preemption_disabled();
2497 if (!arch_spin_trylock(&trace_cmdline_lock))
2498 return 0;
2499
2500 idx = savedcmd->map_pid_to_cmdline[tpid];
2501 if (idx == NO_CMDLINE_MAP) {
2502 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2503
2504 savedcmd->map_pid_to_cmdline[tpid] = idx;
2505 savedcmd->cmdline_idx = idx;
2506 }
2507
2508 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2509 set_cmdline(idx, tsk->comm);
2510
2511 arch_spin_unlock(&trace_cmdline_lock);
2512
2513 return 1;
2514 }
2515
__trace_find_cmdline(int pid,char comm[])2516 static void __trace_find_cmdline(int pid, char comm[])
2517 {
2518 unsigned map;
2519 int tpid;
2520
2521 if (!pid) {
2522 strcpy(comm, "<idle>");
2523 return;
2524 }
2525
2526 if (WARN_ON_ONCE(pid < 0)) {
2527 strcpy(comm, "<XXX>");
2528 return;
2529 }
2530
2531 tpid = pid & (PID_MAX_DEFAULT - 1);
2532 map = savedcmd->map_pid_to_cmdline[tpid];
2533 if (map != NO_CMDLINE_MAP) {
2534 tpid = savedcmd->map_cmdline_to_pid[map];
2535 if (tpid == pid) {
2536 strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2537 return;
2538 }
2539 }
2540 strcpy(comm, "<...>");
2541 }
2542
trace_find_cmdline(int pid,char comm[])2543 void trace_find_cmdline(int pid, char comm[])
2544 {
2545 preempt_disable();
2546 arch_spin_lock(&trace_cmdline_lock);
2547
2548 __trace_find_cmdline(pid, comm);
2549
2550 arch_spin_unlock(&trace_cmdline_lock);
2551 preempt_enable();
2552 }
2553
trace_find_tgid_ptr(int pid)2554 static int *trace_find_tgid_ptr(int pid)
2555 {
2556 /*
2557 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2558 * if we observe a non-NULL tgid_map then we also observe the correct
2559 * tgid_map_max.
2560 */
2561 int *map = smp_load_acquire(&tgid_map);
2562
2563 if (unlikely(!map || pid > tgid_map_max))
2564 return NULL;
2565
2566 return &map[pid];
2567 }
2568
trace_find_tgid(int pid)2569 int trace_find_tgid(int pid)
2570 {
2571 int *ptr = trace_find_tgid_ptr(pid);
2572
2573 return ptr ? *ptr : 0;
2574 }
2575
trace_save_tgid(struct task_struct * tsk)2576 static int trace_save_tgid(struct task_struct *tsk)
2577 {
2578 int *ptr;
2579
2580 /* treat recording of idle task as a success */
2581 if (!tsk->pid)
2582 return 1;
2583
2584 ptr = trace_find_tgid_ptr(tsk->pid);
2585 if (!ptr)
2586 return 0;
2587
2588 *ptr = tsk->tgid;
2589 return 1;
2590 }
2591
tracing_record_taskinfo_skip(int flags)2592 static bool tracing_record_taskinfo_skip(int flags)
2593 {
2594 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2595 return true;
2596 if (!__this_cpu_read(trace_taskinfo_save))
2597 return true;
2598 return false;
2599 }
2600
2601 /**
2602 * tracing_record_taskinfo - record the task info of a task
2603 *
2604 * @task: task to record
2605 * @flags: TRACE_RECORD_CMDLINE for recording comm
2606 * TRACE_RECORD_TGID for recording tgid
2607 */
tracing_record_taskinfo(struct task_struct * task,int flags)2608 void tracing_record_taskinfo(struct task_struct *task, int flags)
2609 {
2610 bool done;
2611
2612 if (tracing_record_taskinfo_skip(flags))
2613 return;
2614
2615 /*
2616 * Record as much task information as possible. If some fail, continue
2617 * to try to record the others.
2618 */
2619 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2620 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2621
2622 /* If recording any information failed, retry again soon. */
2623 if (!done)
2624 return;
2625
2626 __this_cpu_write(trace_taskinfo_save, false);
2627 }
2628
2629 /**
2630 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2631 *
2632 * @prev: previous task during sched_switch
2633 * @next: next task during sched_switch
2634 * @flags: TRACE_RECORD_CMDLINE for recording comm
2635 * TRACE_RECORD_TGID for recording tgid
2636 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2637 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2638 struct task_struct *next, int flags)
2639 {
2640 bool done;
2641
2642 if (tracing_record_taskinfo_skip(flags))
2643 return;
2644
2645 /*
2646 * Record as much task information as possible. If some fail, continue
2647 * to try to record the others.
2648 */
2649 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2650 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2651 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2652 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2653
2654 /* If recording any information failed, retry again soon. */
2655 if (!done)
2656 return;
2657
2658 __this_cpu_write(trace_taskinfo_save, false);
2659 }
2660
2661 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2662 void tracing_record_cmdline(struct task_struct *task)
2663 {
2664 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2665 }
2666
tracing_record_tgid(struct task_struct * task)2667 void tracing_record_tgid(struct task_struct *task)
2668 {
2669 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2670 }
2671
2672 /*
2673 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2674 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2675 * simplifies those functions and keeps them in sync.
2676 */
trace_handle_return(struct trace_seq * s)2677 enum print_line_t trace_handle_return(struct trace_seq *s)
2678 {
2679 return trace_seq_has_overflowed(s) ?
2680 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2681 }
2682 EXPORT_SYMBOL_GPL(trace_handle_return);
2683
migration_disable_value(void)2684 static unsigned short migration_disable_value(void)
2685 {
2686 #if defined(CONFIG_SMP)
2687 return current->migration_disabled;
2688 #else
2689 return 0;
2690 #endif
2691 }
2692
tracing_gen_ctx_irq_test(unsigned int irqs_status)2693 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2694 {
2695 unsigned int trace_flags = irqs_status;
2696 unsigned int pc;
2697
2698 pc = preempt_count();
2699
2700 if (pc & NMI_MASK)
2701 trace_flags |= TRACE_FLAG_NMI;
2702 if (pc & HARDIRQ_MASK)
2703 trace_flags |= TRACE_FLAG_HARDIRQ;
2704 if (in_serving_softirq())
2705 trace_flags |= TRACE_FLAG_SOFTIRQ;
2706 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2707 trace_flags |= TRACE_FLAG_BH_OFF;
2708
2709 if (tif_need_resched())
2710 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2711 if (test_preempt_need_resched())
2712 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2713 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2714 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2715 }
2716
2717 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2718 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2719 int type,
2720 unsigned long len,
2721 unsigned int trace_ctx)
2722 {
2723 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2724 }
2725
2726 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2727 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2728 static int trace_buffered_event_ref;
2729
2730 /**
2731 * trace_buffered_event_enable - enable buffering events
2732 *
2733 * When events are being filtered, it is quicker to use a temporary
2734 * buffer to write the event data into if there's a likely chance
2735 * that it will not be committed. The discard of the ring buffer
2736 * is not as fast as committing, and is much slower than copying
2737 * a commit.
2738 *
2739 * When an event is to be filtered, allocate per cpu buffers to
2740 * write the event data into, and if the event is filtered and discarded
2741 * it is simply dropped, otherwise, the entire data is to be committed
2742 * in one shot.
2743 */
trace_buffered_event_enable(void)2744 void trace_buffered_event_enable(void)
2745 {
2746 struct ring_buffer_event *event;
2747 struct page *page;
2748 int cpu;
2749
2750 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2751
2752 if (trace_buffered_event_ref++)
2753 return;
2754
2755 for_each_tracing_cpu(cpu) {
2756 page = alloc_pages_node(cpu_to_node(cpu),
2757 GFP_KERNEL | __GFP_NORETRY, 0);
2758 /* This is just an optimization and can handle failures */
2759 if (!page) {
2760 pr_err("Failed to allocate event buffer\n");
2761 break;
2762 }
2763
2764 event = page_address(page);
2765 memset(event, 0, sizeof(*event));
2766
2767 per_cpu(trace_buffered_event, cpu) = event;
2768
2769 preempt_disable();
2770 if (cpu == smp_processor_id() &&
2771 __this_cpu_read(trace_buffered_event) !=
2772 per_cpu(trace_buffered_event, cpu))
2773 WARN_ON_ONCE(1);
2774 preempt_enable();
2775 }
2776 }
2777
enable_trace_buffered_event(void * data)2778 static void enable_trace_buffered_event(void *data)
2779 {
2780 /* Probably not needed, but do it anyway */
2781 smp_rmb();
2782 this_cpu_dec(trace_buffered_event_cnt);
2783 }
2784
disable_trace_buffered_event(void * data)2785 static void disable_trace_buffered_event(void *data)
2786 {
2787 this_cpu_inc(trace_buffered_event_cnt);
2788 }
2789
2790 /**
2791 * trace_buffered_event_disable - disable buffering events
2792 *
2793 * When a filter is removed, it is faster to not use the buffered
2794 * events, and to commit directly into the ring buffer. Free up
2795 * the temp buffers when there are no more users. This requires
2796 * special synchronization with current events.
2797 */
trace_buffered_event_disable(void)2798 void trace_buffered_event_disable(void)
2799 {
2800 int cpu;
2801
2802 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2803
2804 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2805 return;
2806
2807 if (--trace_buffered_event_ref)
2808 return;
2809
2810 /* For each CPU, set the buffer as used. */
2811 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2812 NULL, true);
2813
2814 /* Wait for all current users to finish */
2815 synchronize_rcu();
2816
2817 for_each_tracing_cpu(cpu) {
2818 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2819 per_cpu(trace_buffered_event, cpu) = NULL;
2820 }
2821
2822 /*
2823 * Wait for all CPUs that potentially started checking if they can use
2824 * their event buffer only after the previous synchronize_rcu() call and
2825 * they still read a valid pointer from trace_buffered_event. It must be
2826 * ensured they don't see cleared trace_buffered_event_cnt else they
2827 * could wrongly decide to use the pointed-to buffer which is now freed.
2828 */
2829 synchronize_rcu();
2830
2831 /* For each CPU, relinquish the buffer */
2832 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2833 true);
2834 }
2835
2836 static struct trace_buffer *temp_buffer;
2837
2838 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2839 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2840 struct trace_event_file *trace_file,
2841 int type, unsigned long len,
2842 unsigned int trace_ctx)
2843 {
2844 struct ring_buffer_event *entry;
2845 struct trace_array *tr = trace_file->tr;
2846 int val;
2847
2848 *current_rb = tr->array_buffer.buffer;
2849
2850 if (!tr->no_filter_buffering_ref &&
2851 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2852 preempt_disable_notrace();
2853 /*
2854 * Filtering is on, so try to use the per cpu buffer first.
2855 * This buffer will simulate a ring_buffer_event,
2856 * where the type_len is zero and the array[0] will
2857 * hold the full length.
2858 * (see include/linux/ring-buffer.h for details on
2859 * how the ring_buffer_event is structured).
2860 *
2861 * Using a temp buffer during filtering and copying it
2862 * on a matched filter is quicker than writing directly
2863 * into the ring buffer and then discarding it when
2864 * it doesn't match. That is because the discard
2865 * requires several atomic operations to get right.
2866 * Copying on match and doing nothing on a failed match
2867 * is still quicker than no copy on match, but having
2868 * to discard out of the ring buffer on a failed match.
2869 */
2870 if ((entry = __this_cpu_read(trace_buffered_event))) {
2871 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2872
2873 val = this_cpu_inc_return(trace_buffered_event_cnt);
2874
2875 /*
2876 * Preemption is disabled, but interrupts and NMIs
2877 * can still come in now. If that happens after
2878 * the above increment, then it will have to go
2879 * back to the old method of allocating the event
2880 * on the ring buffer, and if the filter fails, it
2881 * will have to call ring_buffer_discard_commit()
2882 * to remove it.
2883 *
2884 * Need to also check the unlikely case that the
2885 * length is bigger than the temp buffer size.
2886 * If that happens, then the reserve is pretty much
2887 * guaranteed to fail, as the ring buffer currently
2888 * only allows events less than a page. But that may
2889 * change in the future, so let the ring buffer reserve
2890 * handle the failure in that case.
2891 */
2892 if (val == 1 && likely(len <= max_len)) {
2893 trace_event_setup(entry, type, trace_ctx);
2894 entry->array[0] = len;
2895 /* Return with preemption disabled */
2896 return entry;
2897 }
2898 this_cpu_dec(trace_buffered_event_cnt);
2899 }
2900 /* __trace_buffer_lock_reserve() disables preemption */
2901 preempt_enable_notrace();
2902 }
2903
2904 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2905 trace_ctx);
2906 /*
2907 * If tracing is off, but we have triggers enabled
2908 * we still need to look at the event data. Use the temp_buffer
2909 * to store the trace event for the trigger to use. It's recursive
2910 * safe and will not be recorded anywhere.
2911 */
2912 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2913 *current_rb = temp_buffer;
2914 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2915 trace_ctx);
2916 }
2917 return entry;
2918 }
2919 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2920
2921 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2922 static DEFINE_MUTEX(tracepoint_printk_mutex);
2923
output_printk(struct trace_event_buffer * fbuffer)2924 static void output_printk(struct trace_event_buffer *fbuffer)
2925 {
2926 struct trace_event_call *event_call;
2927 struct trace_event_file *file;
2928 struct trace_event *event;
2929 unsigned long flags;
2930 struct trace_iterator *iter = tracepoint_print_iter;
2931
2932 /* We should never get here if iter is NULL */
2933 if (WARN_ON_ONCE(!iter))
2934 return;
2935
2936 event_call = fbuffer->trace_file->event_call;
2937 if (!event_call || !event_call->event.funcs ||
2938 !event_call->event.funcs->trace)
2939 return;
2940
2941 file = fbuffer->trace_file;
2942 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2943 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2944 !filter_match_preds(file->filter, fbuffer->entry)))
2945 return;
2946
2947 event = &fbuffer->trace_file->event_call->event;
2948
2949 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2950 trace_seq_init(&iter->seq);
2951 iter->ent = fbuffer->entry;
2952 event_call->event.funcs->trace(iter, 0, event);
2953 trace_seq_putc(&iter->seq, 0);
2954 printk("%s", iter->seq.buffer);
2955
2956 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2957 }
2958
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2959 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2960 void *buffer, size_t *lenp,
2961 loff_t *ppos)
2962 {
2963 int save_tracepoint_printk;
2964 int ret;
2965
2966 mutex_lock(&tracepoint_printk_mutex);
2967 save_tracepoint_printk = tracepoint_printk;
2968
2969 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2970
2971 /*
2972 * This will force exiting early, as tracepoint_printk
2973 * is always zero when tracepoint_printk_iter is not allocated
2974 */
2975 if (!tracepoint_print_iter)
2976 tracepoint_printk = 0;
2977
2978 if (save_tracepoint_printk == tracepoint_printk)
2979 goto out;
2980
2981 if (tracepoint_printk)
2982 static_key_enable(&tracepoint_printk_key.key);
2983 else
2984 static_key_disable(&tracepoint_printk_key.key);
2985
2986 out:
2987 mutex_unlock(&tracepoint_printk_mutex);
2988
2989 return ret;
2990 }
2991
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2992 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2993 {
2994 enum event_trigger_type tt = ETT_NONE;
2995 struct trace_event_file *file = fbuffer->trace_file;
2996
2997 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2998 fbuffer->entry, &tt))
2999 goto discard;
3000
3001 if (static_key_false(&tracepoint_printk_key.key))
3002 output_printk(fbuffer);
3003
3004 if (static_branch_unlikely(&trace_event_exports_enabled))
3005 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3006
3007 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3008 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3009
3010 discard:
3011 if (tt)
3012 event_triggers_post_call(file, tt);
3013
3014 }
3015 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3016
3017 /*
3018 * Skip 3:
3019 *
3020 * trace_buffer_unlock_commit_regs()
3021 * trace_event_buffer_commit()
3022 * trace_event_raw_event_xxx()
3023 */
3024 # define STACK_SKIP 3
3025
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)3026 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3027 struct trace_buffer *buffer,
3028 struct ring_buffer_event *event,
3029 unsigned int trace_ctx,
3030 struct pt_regs *regs)
3031 {
3032 __buffer_unlock_commit(buffer, event);
3033
3034 /*
3035 * If regs is not set, then skip the necessary functions.
3036 * Note, we can still get here via blktrace, wakeup tracer
3037 * and mmiotrace, but that's ok if they lose a function or
3038 * two. They are not that meaningful.
3039 */
3040 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3041 ftrace_trace_userstack(tr, buffer, trace_ctx);
3042 }
3043
3044 /*
3045 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3046 */
3047 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)3048 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3049 struct ring_buffer_event *event)
3050 {
3051 __buffer_unlock_commit(buffer, event);
3052 }
3053
3054 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)3055 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3056 parent_ip, unsigned int trace_ctx)
3057 {
3058 struct trace_event_call *call = &event_function;
3059 struct trace_buffer *buffer = tr->array_buffer.buffer;
3060 struct ring_buffer_event *event;
3061 struct ftrace_entry *entry;
3062
3063 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3064 trace_ctx);
3065 if (!event)
3066 return;
3067 entry = ring_buffer_event_data(event);
3068 entry->ip = ip;
3069 entry->parent_ip = parent_ip;
3070
3071 if (!call_filter_check_discard(call, entry, buffer, event)) {
3072 if (static_branch_unlikely(&trace_function_exports_enabled))
3073 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3074 __buffer_unlock_commit(buffer, event);
3075 }
3076 }
3077
3078 #ifdef CONFIG_STACKTRACE
3079
3080 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3081 #define FTRACE_KSTACK_NESTING 4
3082
3083 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3084
3085 struct ftrace_stack {
3086 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3087 };
3088
3089
3090 struct ftrace_stacks {
3091 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3092 };
3093
3094 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3095 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3096
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3097 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3098 unsigned int trace_ctx,
3099 int skip, struct pt_regs *regs)
3100 {
3101 struct trace_event_call *call = &event_kernel_stack;
3102 struct ring_buffer_event *event;
3103 unsigned int size, nr_entries;
3104 struct ftrace_stack *fstack;
3105 struct stack_entry *entry;
3106 int stackidx;
3107
3108 /*
3109 * Add one, for this function and the call to save_stack_trace()
3110 * If regs is set, then these functions will not be in the way.
3111 */
3112 #ifndef CONFIG_UNWINDER_ORC
3113 if (!regs)
3114 skip++;
3115 #endif
3116
3117 preempt_disable_notrace();
3118
3119 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3120
3121 /* This should never happen. If it does, yell once and skip */
3122 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3123 goto out;
3124
3125 /*
3126 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3127 * interrupt will either see the value pre increment or post
3128 * increment. If the interrupt happens pre increment it will have
3129 * restored the counter when it returns. We just need a barrier to
3130 * keep gcc from moving things around.
3131 */
3132 barrier();
3133
3134 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3135 size = ARRAY_SIZE(fstack->calls);
3136
3137 if (regs) {
3138 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3139 size, skip);
3140 } else {
3141 nr_entries = stack_trace_save(fstack->calls, size, skip);
3142 }
3143
3144 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3145 struct_size(entry, caller, nr_entries),
3146 trace_ctx);
3147 if (!event)
3148 goto out;
3149 entry = ring_buffer_event_data(event);
3150
3151 entry->size = nr_entries;
3152 memcpy(&entry->caller, fstack->calls,
3153 flex_array_size(entry, caller, nr_entries));
3154
3155 if (!call_filter_check_discard(call, entry, buffer, event))
3156 __buffer_unlock_commit(buffer, event);
3157
3158 out:
3159 /* Again, don't let gcc optimize things here */
3160 barrier();
3161 __this_cpu_dec(ftrace_stack_reserve);
3162 preempt_enable_notrace();
3163
3164 }
3165
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3166 static inline void ftrace_trace_stack(struct trace_array *tr,
3167 struct trace_buffer *buffer,
3168 unsigned int trace_ctx,
3169 int skip, struct pt_regs *regs)
3170 {
3171 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3172 return;
3173
3174 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3175 }
3176
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3177 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3178 int skip)
3179 {
3180 struct trace_buffer *buffer = tr->array_buffer.buffer;
3181
3182 if (rcu_is_watching()) {
3183 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3184 return;
3185 }
3186
3187 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3188 return;
3189
3190 /*
3191 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3192 * but if the above rcu_is_watching() failed, then the NMI
3193 * triggered someplace critical, and ct_irq_enter() should
3194 * not be called from NMI.
3195 */
3196 if (unlikely(in_nmi()))
3197 return;
3198
3199 ct_irq_enter_irqson();
3200 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3201 ct_irq_exit_irqson();
3202 }
3203
3204 /**
3205 * trace_dump_stack - record a stack back trace in the trace buffer
3206 * @skip: Number of functions to skip (helper handlers)
3207 */
trace_dump_stack(int skip)3208 void trace_dump_stack(int skip)
3209 {
3210 if (tracing_disabled || tracing_selftest_running)
3211 return;
3212
3213 #ifndef CONFIG_UNWINDER_ORC
3214 /* Skip 1 to skip this function. */
3215 skip++;
3216 #endif
3217 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3218 tracing_gen_ctx(), skip, NULL);
3219 }
3220 EXPORT_SYMBOL_GPL(trace_dump_stack);
3221
3222 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3223 static DEFINE_PER_CPU(int, user_stack_count);
3224
3225 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3226 ftrace_trace_userstack(struct trace_array *tr,
3227 struct trace_buffer *buffer, unsigned int trace_ctx)
3228 {
3229 struct trace_event_call *call = &event_user_stack;
3230 struct ring_buffer_event *event;
3231 struct userstack_entry *entry;
3232
3233 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3234 return;
3235
3236 /*
3237 * NMIs can not handle page faults, even with fix ups.
3238 * The save user stack can (and often does) fault.
3239 */
3240 if (unlikely(in_nmi()))
3241 return;
3242
3243 /*
3244 * prevent recursion, since the user stack tracing may
3245 * trigger other kernel events.
3246 */
3247 preempt_disable();
3248 if (__this_cpu_read(user_stack_count))
3249 goto out;
3250
3251 __this_cpu_inc(user_stack_count);
3252
3253 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3254 sizeof(*entry), trace_ctx);
3255 if (!event)
3256 goto out_drop_count;
3257 entry = ring_buffer_event_data(event);
3258
3259 entry->tgid = current->tgid;
3260 memset(&entry->caller, 0, sizeof(entry->caller));
3261
3262 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3263 if (!call_filter_check_discard(call, entry, buffer, event))
3264 __buffer_unlock_commit(buffer, event);
3265
3266 out_drop_count:
3267 __this_cpu_dec(user_stack_count);
3268 out:
3269 preempt_enable();
3270 }
3271 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3272 static void ftrace_trace_userstack(struct trace_array *tr,
3273 struct trace_buffer *buffer,
3274 unsigned int trace_ctx)
3275 {
3276 }
3277 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3278
3279 #endif /* CONFIG_STACKTRACE */
3280
3281 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3282 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3283 unsigned long long delta)
3284 {
3285 entry->bottom_delta_ts = delta & U32_MAX;
3286 entry->top_delta_ts = (delta >> 32);
3287 }
3288
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3289 void trace_last_func_repeats(struct trace_array *tr,
3290 struct trace_func_repeats *last_info,
3291 unsigned int trace_ctx)
3292 {
3293 struct trace_buffer *buffer = tr->array_buffer.buffer;
3294 struct func_repeats_entry *entry;
3295 struct ring_buffer_event *event;
3296 u64 delta;
3297
3298 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3299 sizeof(*entry), trace_ctx);
3300 if (!event)
3301 return;
3302
3303 delta = ring_buffer_event_time_stamp(buffer, event) -
3304 last_info->ts_last_call;
3305
3306 entry = ring_buffer_event_data(event);
3307 entry->ip = last_info->ip;
3308 entry->parent_ip = last_info->parent_ip;
3309 entry->count = last_info->count;
3310 func_repeats_set_delta_ts(entry, delta);
3311
3312 __buffer_unlock_commit(buffer, event);
3313 }
3314
3315 /* created for use with alloc_percpu */
3316 struct trace_buffer_struct {
3317 int nesting;
3318 char buffer[4][TRACE_BUF_SIZE];
3319 };
3320
3321 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3322
3323 /*
3324 * This allows for lockless recording. If we're nested too deeply, then
3325 * this returns NULL.
3326 */
get_trace_buf(void)3327 static char *get_trace_buf(void)
3328 {
3329 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3330
3331 if (!trace_percpu_buffer || buffer->nesting >= 4)
3332 return NULL;
3333
3334 buffer->nesting++;
3335
3336 /* Interrupts must see nesting incremented before we use the buffer */
3337 barrier();
3338 return &buffer->buffer[buffer->nesting - 1][0];
3339 }
3340
put_trace_buf(void)3341 static void put_trace_buf(void)
3342 {
3343 /* Don't let the decrement of nesting leak before this */
3344 barrier();
3345 this_cpu_dec(trace_percpu_buffer->nesting);
3346 }
3347
alloc_percpu_trace_buffer(void)3348 static int alloc_percpu_trace_buffer(void)
3349 {
3350 struct trace_buffer_struct __percpu *buffers;
3351
3352 if (trace_percpu_buffer)
3353 return 0;
3354
3355 buffers = alloc_percpu(struct trace_buffer_struct);
3356 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3357 return -ENOMEM;
3358
3359 trace_percpu_buffer = buffers;
3360 return 0;
3361 }
3362
3363 static int buffers_allocated;
3364
trace_printk_init_buffers(void)3365 void trace_printk_init_buffers(void)
3366 {
3367 if (buffers_allocated)
3368 return;
3369
3370 if (alloc_percpu_trace_buffer())
3371 return;
3372
3373 /* trace_printk() is for debug use only. Don't use it in production. */
3374
3375 pr_warn("\n");
3376 pr_warn("**********************************************************\n");
3377 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3378 pr_warn("** **\n");
3379 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3380 pr_warn("** **\n");
3381 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3382 pr_warn("** unsafe for production use. **\n");
3383 pr_warn("** **\n");
3384 pr_warn("** If you see this message and you are not debugging **\n");
3385 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3386 pr_warn("** **\n");
3387 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3388 pr_warn("**********************************************************\n");
3389
3390 /* Expand the buffers to set size */
3391 tracing_update_buffers();
3392
3393 buffers_allocated = 1;
3394
3395 /*
3396 * trace_printk_init_buffers() can be called by modules.
3397 * If that happens, then we need to start cmdline recording
3398 * directly here. If the global_trace.buffer is already
3399 * allocated here, then this was called by module code.
3400 */
3401 if (global_trace.array_buffer.buffer)
3402 tracing_start_cmdline_record();
3403 }
3404 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3405
trace_printk_start_comm(void)3406 void trace_printk_start_comm(void)
3407 {
3408 /* Start tracing comms if trace printk is set */
3409 if (!buffers_allocated)
3410 return;
3411 tracing_start_cmdline_record();
3412 }
3413
trace_printk_start_stop_comm(int enabled)3414 static void trace_printk_start_stop_comm(int enabled)
3415 {
3416 if (!buffers_allocated)
3417 return;
3418
3419 if (enabled)
3420 tracing_start_cmdline_record();
3421 else
3422 tracing_stop_cmdline_record();
3423 }
3424
3425 /**
3426 * trace_vbprintk - write binary msg to tracing buffer
3427 * @ip: The address of the caller
3428 * @fmt: The string format to write to the buffer
3429 * @args: Arguments for @fmt
3430 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3431 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3432 {
3433 struct trace_event_call *call = &event_bprint;
3434 struct ring_buffer_event *event;
3435 struct trace_buffer *buffer;
3436 struct trace_array *tr = &global_trace;
3437 struct bprint_entry *entry;
3438 unsigned int trace_ctx;
3439 char *tbuffer;
3440 int len = 0, size;
3441
3442 if (unlikely(tracing_selftest_running || tracing_disabled))
3443 return 0;
3444
3445 /* Don't pollute graph traces with trace_vprintk internals */
3446 pause_graph_tracing();
3447
3448 trace_ctx = tracing_gen_ctx();
3449 preempt_disable_notrace();
3450
3451 tbuffer = get_trace_buf();
3452 if (!tbuffer) {
3453 len = 0;
3454 goto out_nobuffer;
3455 }
3456
3457 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3458
3459 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3460 goto out_put;
3461
3462 size = sizeof(*entry) + sizeof(u32) * len;
3463 buffer = tr->array_buffer.buffer;
3464 ring_buffer_nest_start(buffer);
3465 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3466 trace_ctx);
3467 if (!event)
3468 goto out;
3469 entry = ring_buffer_event_data(event);
3470 entry->ip = ip;
3471 entry->fmt = fmt;
3472
3473 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3474 if (!call_filter_check_discard(call, entry, buffer, event)) {
3475 __buffer_unlock_commit(buffer, event);
3476 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3477 }
3478
3479 out:
3480 ring_buffer_nest_end(buffer);
3481 out_put:
3482 put_trace_buf();
3483
3484 out_nobuffer:
3485 preempt_enable_notrace();
3486 unpause_graph_tracing();
3487
3488 return len;
3489 }
3490 EXPORT_SYMBOL_GPL(trace_vbprintk);
3491
3492 __printf(3, 0)
3493 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3494 __trace_array_vprintk(struct trace_buffer *buffer,
3495 unsigned long ip, const char *fmt, va_list args)
3496 {
3497 struct trace_event_call *call = &event_print;
3498 struct ring_buffer_event *event;
3499 int len = 0, size;
3500 struct print_entry *entry;
3501 unsigned int trace_ctx;
3502 char *tbuffer;
3503
3504 if (tracing_disabled)
3505 return 0;
3506
3507 /* Don't pollute graph traces with trace_vprintk internals */
3508 pause_graph_tracing();
3509
3510 trace_ctx = tracing_gen_ctx();
3511 preempt_disable_notrace();
3512
3513
3514 tbuffer = get_trace_buf();
3515 if (!tbuffer) {
3516 len = 0;
3517 goto out_nobuffer;
3518 }
3519
3520 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3521
3522 size = sizeof(*entry) + len + 1;
3523 ring_buffer_nest_start(buffer);
3524 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3525 trace_ctx);
3526 if (!event)
3527 goto out;
3528 entry = ring_buffer_event_data(event);
3529 entry->ip = ip;
3530
3531 memcpy(&entry->buf, tbuffer, len + 1);
3532 if (!call_filter_check_discard(call, entry, buffer, event)) {
3533 __buffer_unlock_commit(buffer, event);
3534 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3535 }
3536
3537 out:
3538 ring_buffer_nest_end(buffer);
3539 put_trace_buf();
3540
3541 out_nobuffer:
3542 preempt_enable_notrace();
3543 unpause_graph_tracing();
3544
3545 return len;
3546 }
3547
3548 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3549 int trace_array_vprintk(struct trace_array *tr,
3550 unsigned long ip, const char *fmt, va_list args)
3551 {
3552 if (tracing_selftest_running && tr == &global_trace)
3553 return 0;
3554
3555 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3556 }
3557
3558 /**
3559 * trace_array_printk - Print a message to a specific instance
3560 * @tr: The instance trace_array descriptor
3561 * @ip: The instruction pointer that this is called from.
3562 * @fmt: The format to print (printf format)
3563 *
3564 * If a subsystem sets up its own instance, they have the right to
3565 * printk strings into their tracing instance buffer using this
3566 * function. Note, this function will not write into the top level
3567 * buffer (use trace_printk() for that), as writing into the top level
3568 * buffer should only have events that can be individually disabled.
3569 * trace_printk() is only used for debugging a kernel, and should not
3570 * be ever incorporated in normal use.
3571 *
3572 * trace_array_printk() can be used, as it will not add noise to the
3573 * top level tracing buffer.
3574 *
3575 * Note, trace_array_init_printk() must be called on @tr before this
3576 * can be used.
3577 */
3578 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3579 int trace_array_printk(struct trace_array *tr,
3580 unsigned long ip, const char *fmt, ...)
3581 {
3582 int ret;
3583 va_list ap;
3584
3585 if (!tr)
3586 return -ENOENT;
3587
3588 /* This is only allowed for created instances */
3589 if (tr == &global_trace)
3590 return 0;
3591
3592 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3593 return 0;
3594
3595 va_start(ap, fmt);
3596 ret = trace_array_vprintk(tr, ip, fmt, ap);
3597 va_end(ap);
3598 return ret;
3599 }
3600 EXPORT_SYMBOL_GPL(trace_array_printk);
3601
3602 /**
3603 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3604 * @tr: The trace array to initialize the buffers for
3605 *
3606 * As trace_array_printk() only writes into instances, they are OK to
3607 * have in the kernel (unlike trace_printk()). This needs to be called
3608 * before trace_array_printk() can be used on a trace_array.
3609 */
trace_array_init_printk(struct trace_array * tr)3610 int trace_array_init_printk(struct trace_array *tr)
3611 {
3612 if (!tr)
3613 return -ENOENT;
3614
3615 /* This is only allowed for created instances */
3616 if (tr == &global_trace)
3617 return -EINVAL;
3618
3619 return alloc_percpu_trace_buffer();
3620 }
3621 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3622
3623 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3624 int trace_array_printk_buf(struct trace_buffer *buffer,
3625 unsigned long ip, const char *fmt, ...)
3626 {
3627 int ret;
3628 va_list ap;
3629
3630 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3631 return 0;
3632
3633 va_start(ap, fmt);
3634 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3635 va_end(ap);
3636 return ret;
3637 }
3638
3639 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3640 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3641 {
3642 return trace_array_vprintk(&global_trace, ip, fmt, args);
3643 }
3644 EXPORT_SYMBOL_GPL(trace_vprintk);
3645
trace_iterator_increment(struct trace_iterator * iter)3646 static void trace_iterator_increment(struct trace_iterator *iter)
3647 {
3648 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3649
3650 iter->idx++;
3651 if (buf_iter)
3652 ring_buffer_iter_advance(buf_iter);
3653 }
3654
3655 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3656 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3657 unsigned long *lost_events)
3658 {
3659 struct ring_buffer_event *event;
3660 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3661
3662 if (buf_iter) {
3663 event = ring_buffer_iter_peek(buf_iter, ts);
3664 if (lost_events)
3665 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3666 (unsigned long)-1 : 0;
3667 } else {
3668 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3669 lost_events);
3670 }
3671
3672 if (event) {
3673 iter->ent_size = ring_buffer_event_length(event);
3674 return ring_buffer_event_data(event);
3675 }
3676 iter->ent_size = 0;
3677 return NULL;
3678 }
3679
3680 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3681 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3682 unsigned long *missing_events, u64 *ent_ts)
3683 {
3684 struct trace_buffer *buffer = iter->array_buffer->buffer;
3685 struct trace_entry *ent, *next = NULL;
3686 unsigned long lost_events = 0, next_lost = 0;
3687 int cpu_file = iter->cpu_file;
3688 u64 next_ts = 0, ts;
3689 int next_cpu = -1;
3690 int next_size = 0;
3691 int cpu;
3692
3693 /*
3694 * If we are in a per_cpu trace file, don't bother by iterating over
3695 * all cpu and peek directly.
3696 */
3697 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3698 if (ring_buffer_empty_cpu(buffer, cpu_file))
3699 return NULL;
3700 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3701 if (ent_cpu)
3702 *ent_cpu = cpu_file;
3703
3704 return ent;
3705 }
3706
3707 for_each_tracing_cpu(cpu) {
3708
3709 if (ring_buffer_empty_cpu(buffer, cpu))
3710 continue;
3711
3712 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3713
3714 /*
3715 * Pick the entry with the smallest timestamp:
3716 */
3717 if (ent && (!next || ts < next_ts)) {
3718 next = ent;
3719 next_cpu = cpu;
3720 next_ts = ts;
3721 next_lost = lost_events;
3722 next_size = iter->ent_size;
3723 }
3724 }
3725
3726 iter->ent_size = next_size;
3727
3728 if (ent_cpu)
3729 *ent_cpu = next_cpu;
3730
3731 if (ent_ts)
3732 *ent_ts = next_ts;
3733
3734 if (missing_events)
3735 *missing_events = next_lost;
3736
3737 return next;
3738 }
3739
3740 #define STATIC_FMT_BUF_SIZE 128
3741 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3742
trace_iter_expand_format(struct trace_iterator * iter)3743 char *trace_iter_expand_format(struct trace_iterator *iter)
3744 {
3745 char *tmp;
3746
3747 /*
3748 * iter->tr is NULL when used with tp_printk, which makes
3749 * this get called where it is not safe to call krealloc().
3750 */
3751 if (!iter->tr || iter->fmt == static_fmt_buf)
3752 return NULL;
3753
3754 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3755 GFP_KERNEL);
3756 if (tmp) {
3757 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3758 iter->fmt = tmp;
3759 }
3760
3761 return tmp;
3762 }
3763
3764 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str,bool star,int len)3765 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3766 bool star, int len)
3767 {
3768 unsigned long addr = (unsigned long)str;
3769 struct trace_event *trace_event;
3770 struct trace_event_call *event;
3771
3772 /* Ignore strings with no length */
3773 if (star && !len)
3774 return true;
3775
3776 /* OK if part of the event data */
3777 if ((addr >= (unsigned long)iter->ent) &&
3778 (addr < (unsigned long)iter->ent + iter->ent_size))
3779 return true;
3780
3781 /* OK if part of the temp seq buffer */
3782 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3783 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3784 return true;
3785
3786 /* Core rodata can not be freed */
3787 if (is_kernel_rodata(addr))
3788 return true;
3789
3790 if (trace_is_tracepoint_string(str))
3791 return true;
3792
3793 /*
3794 * Now this could be a module event, referencing core module
3795 * data, which is OK.
3796 */
3797 if (!iter->ent)
3798 return false;
3799
3800 trace_event = ftrace_find_event(iter->ent->type);
3801 if (!trace_event)
3802 return false;
3803
3804 event = container_of(trace_event, struct trace_event_call, event);
3805 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3806 return false;
3807
3808 /* Would rather have rodata, but this will suffice */
3809 if (within_module_core(addr, event->module))
3810 return true;
3811
3812 return false;
3813 }
3814
show_buffer(struct trace_seq * s)3815 static const char *show_buffer(struct trace_seq *s)
3816 {
3817 struct seq_buf *seq = &s->seq;
3818
3819 seq_buf_terminate(seq);
3820
3821 return seq->buffer;
3822 }
3823
3824 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3825
test_can_verify_check(const char * fmt,...)3826 static int test_can_verify_check(const char *fmt, ...)
3827 {
3828 char buf[16];
3829 va_list ap;
3830 int ret;
3831
3832 /*
3833 * The verifier is dependent on vsnprintf() modifies the va_list
3834 * passed to it, where it is sent as a reference. Some architectures
3835 * (like x86_32) passes it by value, which means that vsnprintf()
3836 * does not modify the va_list passed to it, and the verifier
3837 * would then need to be able to understand all the values that
3838 * vsnprintf can use. If it is passed by value, then the verifier
3839 * is disabled.
3840 */
3841 va_start(ap, fmt);
3842 vsnprintf(buf, 16, "%d", ap);
3843 ret = va_arg(ap, int);
3844 va_end(ap);
3845
3846 return ret;
3847 }
3848
test_can_verify(void)3849 static void test_can_verify(void)
3850 {
3851 if (!test_can_verify_check("%d %d", 0, 1)) {
3852 pr_info("trace event string verifier disabled\n");
3853 static_branch_inc(&trace_no_verify);
3854 }
3855 }
3856
3857 /**
3858 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3859 * @iter: The iterator that holds the seq buffer and the event being printed
3860 * @fmt: The format used to print the event
3861 * @ap: The va_list holding the data to print from @fmt.
3862 *
3863 * This writes the data into the @iter->seq buffer using the data from
3864 * @fmt and @ap. If the format has a %s, then the source of the string
3865 * is examined to make sure it is safe to print, otherwise it will
3866 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3867 * pointer.
3868 */
trace_check_vprintf(struct trace_iterator * iter,const char * fmt,va_list ap)3869 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3870 va_list ap)
3871 {
3872 const char *p = fmt;
3873 const char *str;
3874 int i, j;
3875
3876 if (WARN_ON_ONCE(!fmt))
3877 return;
3878
3879 if (static_branch_unlikely(&trace_no_verify))
3880 goto print;
3881
3882 /* Don't bother checking when doing a ftrace_dump() */
3883 if (iter->fmt == static_fmt_buf)
3884 goto print;
3885
3886 while (*p) {
3887 bool star = false;
3888 int len = 0;
3889
3890 j = 0;
3891
3892 /* We only care about %s and variants */
3893 for (i = 0; p[i]; i++) {
3894 if (i + 1 >= iter->fmt_size) {
3895 /*
3896 * If we can't expand the copy buffer,
3897 * just print it.
3898 */
3899 if (!trace_iter_expand_format(iter))
3900 goto print;
3901 }
3902
3903 if (p[i] == '\\' && p[i+1]) {
3904 i++;
3905 continue;
3906 }
3907 if (p[i] == '%') {
3908 /* Need to test cases like %08.*s */
3909 for (j = 1; p[i+j]; j++) {
3910 if (isdigit(p[i+j]) ||
3911 p[i+j] == '.')
3912 continue;
3913 if (p[i+j] == '*') {
3914 star = true;
3915 continue;
3916 }
3917 break;
3918 }
3919 if (p[i+j] == 's')
3920 break;
3921 star = false;
3922 }
3923 j = 0;
3924 }
3925 /* If no %s found then just print normally */
3926 if (!p[i])
3927 break;
3928
3929 /* Copy up to the %s, and print that */
3930 strncpy(iter->fmt, p, i);
3931 iter->fmt[i] = '\0';
3932 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3933
3934 /*
3935 * If iter->seq is full, the above call no longer guarantees
3936 * that ap is in sync with fmt processing, and further calls
3937 * to va_arg() can return wrong positional arguments.
3938 *
3939 * Ensure that ap is no longer used in this case.
3940 */
3941 if (iter->seq.full) {
3942 p = "";
3943 break;
3944 }
3945
3946 if (star)
3947 len = va_arg(ap, int);
3948
3949 /* The ap now points to the string data of the %s */
3950 str = va_arg(ap, const char *);
3951
3952 /*
3953 * If you hit this warning, it is likely that the
3954 * trace event in question used %s on a string that
3955 * was saved at the time of the event, but may not be
3956 * around when the trace is read. Use __string(),
3957 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3958 * instead. See samples/trace_events/trace-events-sample.h
3959 * for reference.
3960 */
3961 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3962 "fmt: '%s' current_buffer: '%s'",
3963 fmt, show_buffer(&iter->seq))) {
3964 int ret;
3965
3966 /* Try to safely read the string */
3967 if (star) {
3968 if (len + 1 > iter->fmt_size)
3969 len = iter->fmt_size - 1;
3970 if (len < 0)
3971 len = 0;
3972 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3973 iter->fmt[len] = 0;
3974 star = false;
3975 } else {
3976 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3977 iter->fmt_size);
3978 }
3979 if (ret < 0)
3980 trace_seq_printf(&iter->seq, "(0x%px)", str);
3981 else
3982 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3983 str, iter->fmt);
3984 str = "[UNSAFE-MEMORY]";
3985 strcpy(iter->fmt, "%s");
3986 } else {
3987 strncpy(iter->fmt, p + i, j + 1);
3988 iter->fmt[j+1] = '\0';
3989 }
3990 if (star)
3991 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3992 else
3993 trace_seq_printf(&iter->seq, iter->fmt, str);
3994
3995 p += i + j + 1;
3996 }
3997 print:
3998 if (*p)
3999 trace_seq_vprintf(&iter->seq, p, ap);
4000 }
4001
trace_event_format(struct trace_iterator * iter,const char * fmt)4002 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4003 {
4004 const char *p, *new_fmt;
4005 char *q;
4006
4007 if (WARN_ON_ONCE(!fmt))
4008 return fmt;
4009
4010 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4011 return fmt;
4012
4013 p = fmt;
4014 new_fmt = q = iter->fmt;
4015 while (*p) {
4016 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4017 if (!trace_iter_expand_format(iter))
4018 return fmt;
4019
4020 q += iter->fmt - new_fmt;
4021 new_fmt = iter->fmt;
4022 }
4023
4024 *q++ = *p++;
4025
4026 /* Replace %p with %px */
4027 if (p[-1] == '%') {
4028 if (p[0] == '%') {
4029 *q++ = *p++;
4030 } else if (p[0] == 'p' && !isalnum(p[1])) {
4031 *q++ = *p++;
4032 *q++ = 'x';
4033 }
4034 }
4035 }
4036 *q = '\0';
4037
4038 return new_fmt;
4039 }
4040
4041 #define STATIC_TEMP_BUF_SIZE 128
4042 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4043
4044 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)4045 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4046 int *ent_cpu, u64 *ent_ts)
4047 {
4048 /* __find_next_entry will reset ent_size */
4049 int ent_size = iter->ent_size;
4050 struct trace_entry *entry;
4051
4052 /*
4053 * If called from ftrace_dump(), then the iter->temp buffer
4054 * will be the static_temp_buf and not created from kmalloc.
4055 * If the entry size is greater than the buffer, we can
4056 * not save it. Just return NULL in that case. This is only
4057 * used to add markers when two consecutive events' time
4058 * stamps have a large delta. See trace_print_lat_context()
4059 */
4060 if (iter->temp == static_temp_buf &&
4061 STATIC_TEMP_BUF_SIZE < ent_size)
4062 return NULL;
4063
4064 /*
4065 * The __find_next_entry() may call peek_next_entry(), which may
4066 * call ring_buffer_peek() that may make the contents of iter->ent
4067 * undefined. Need to copy iter->ent now.
4068 */
4069 if (iter->ent && iter->ent != iter->temp) {
4070 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4071 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4072 void *temp;
4073 temp = kmalloc(iter->ent_size, GFP_KERNEL);
4074 if (!temp)
4075 return NULL;
4076 kfree(iter->temp);
4077 iter->temp = temp;
4078 iter->temp_size = iter->ent_size;
4079 }
4080 memcpy(iter->temp, iter->ent, iter->ent_size);
4081 iter->ent = iter->temp;
4082 }
4083 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4084 /* Put back the original ent_size */
4085 iter->ent_size = ent_size;
4086
4087 return entry;
4088 }
4089
4090 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)4091 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4092 {
4093 iter->ent = __find_next_entry(iter, &iter->cpu,
4094 &iter->lost_events, &iter->ts);
4095
4096 if (iter->ent)
4097 trace_iterator_increment(iter);
4098
4099 return iter->ent ? iter : NULL;
4100 }
4101
trace_consume(struct trace_iterator * iter)4102 static void trace_consume(struct trace_iterator *iter)
4103 {
4104 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4105 &iter->lost_events);
4106 }
4107
s_next(struct seq_file * m,void * v,loff_t * pos)4108 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4109 {
4110 struct trace_iterator *iter = m->private;
4111 int i = (int)*pos;
4112 void *ent;
4113
4114 WARN_ON_ONCE(iter->leftover);
4115
4116 (*pos)++;
4117
4118 /* can't go backwards */
4119 if (iter->idx > i)
4120 return NULL;
4121
4122 if (iter->idx < 0)
4123 ent = trace_find_next_entry_inc(iter);
4124 else
4125 ent = iter;
4126
4127 while (ent && iter->idx < i)
4128 ent = trace_find_next_entry_inc(iter);
4129
4130 iter->pos = *pos;
4131
4132 return ent;
4133 }
4134
tracing_iter_reset(struct trace_iterator * iter,int cpu)4135 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4136 {
4137 struct ring_buffer_iter *buf_iter;
4138 unsigned long entries = 0;
4139 u64 ts;
4140
4141 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4142
4143 buf_iter = trace_buffer_iter(iter, cpu);
4144 if (!buf_iter)
4145 return;
4146
4147 ring_buffer_iter_reset(buf_iter);
4148
4149 /*
4150 * We could have the case with the max latency tracers
4151 * that a reset never took place on a cpu. This is evident
4152 * by the timestamp being before the start of the buffer.
4153 */
4154 while (ring_buffer_iter_peek(buf_iter, &ts)) {
4155 if (ts >= iter->array_buffer->time_start)
4156 break;
4157 entries++;
4158 ring_buffer_iter_advance(buf_iter);
4159 }
4160
4161 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4162 }
4163
4164 /*
4165 * The current tracer is copied to avoid a global locking
4166 * all around.
4167 */
s_start(struct seq_file * m,loff_t * pos)4168 static void *s_start(struct seq_file *m, loff_t *pos)
4169 {
4170 struct trace_iterator *iter = m->private;
4171 struct trace_array *tr = iter->tr;
4172 int cpu_file = iter->cpu_file;
4173 void *p = NULL;
4174 loff_t l = 0;
4175 int cpu;
4176
4177 mutex_lock(&trace_types_lock);
4178 if (unlikely(tr->current_trace != iter->trace)) {
4179 /* Close iter->trace before switching to the new current tracer */
4180 if (iter->trace->close)
4181 iter->trace->close(iter);
4182 iter->trace = tr->current_trace;
4183 /* Reopen the new current tracer */
4184 if (iter->trace->open)
4185 iter->trace->open(iter);
4186 }
4187 mutex_unlock(&trace_types_lock);
4188
4189 #ifdef CONFIG_TRACER_MAX_TRACE
4190 if (iter->snapshot && iter->trace->use_max_tr)
4191 return ERR_PTR(-EBUSY);
4192 #endif
4193
4194 if (*pos != iter->pos) {
4195 iter->ent = NULL;
4196 iter->cpu = 0;
4197 iter->idx = -1;
4198
4199 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4200 for_each_tracing_cpu(cpu)
4201 tracing_iter_reset(iter, cpu);
4202 } else
4203 tracing_iter_reset(iter, cpu_file);
4204
4205 iter->leftover = 0;
4206 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4207 ;
4208
4209 } else {
4210 /*
4211 * If we overflowed the seq_file before, then we want
4212 * to just reuse the trace_seq buffer again.
4213 */
4214 if (iter->leftover)
4215 p = iter;
4216 else {
4217 l = *pos - 1;
4218 p = s_next(m, p, &l);
4219 }
4220 }
4221
4222 trace_event_read_lock();
4223 trace_access_lock(cpu_file);
4224 return p;
4225 }
4226
s_stop(struct seq_file * m,void * p)4227 static void s_stop(struct seq_file *m, void *p)
4228 {
4229 struct trace_iterator *iter = m->private;
4230
4231 #ifdef CONFIG_TRACER_MAX_TRACE
4232 if (iter->snapshot && iter->trace->use_max_tr)
4233 return;
4234 #endif
4235
4236 trace_access_unlock(iter->cpu_file);
4237 trace_event_read_unlock();
4238 }
4239
4240 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4241 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4242 unsigned long *entries, int cpu)
4243 {
4244 unsigned long count;
4245
4246 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4247 /*
4248 * If this buffer has skipped entries, then we hold all
4249 * entries for the trace and we need to ignore the
4250 * ones before the time stamp.
4251 */
4252 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4253 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4254 /* total is the same as the entries */
4255 *total = count;
4256 } else
4257 *total = count +
4258 ring_buffer_overrun_cpu(buf->buffer, cpu);
4259 *entries = count;
4260 }
4261
4262 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4263 get_total_entries(struct array_buffer *buf,
4264 unsigned long *total, unsigned long *entries)
4265 {
4266 unsigned long t, e;
4267 int cpu;
4268
4269 *total = 0;
4270 *entries = 0;
4271
4272 for_each_tracing_cpu(cpu) {
4273 get_total_entries_cpu(buf, &t, &e, cpu);
4274 *total += t;
4275 *entries += e;
4276 }
4277 }
4278
trace_total_entries_cpu(struct trace_array * tr,int cpu)4279 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4280 {
4281 unsigned long total, entries;
4282
4283 if (!tr)
4284 tr = &global_trace;
4285
4286 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4287
4288 return entries;
4289 }
4290
trace_total_entries(struct trace_array * tr)4291 unsigned long trace_total_entries(struct trace_array *tr)
4292 {
4293 unsigned long total, entries;
4294
4295 if (!tr)
4296 tr = &global_trace;
4297
4298 get_total_entries(&tr->array_buffer, &total, &entries);
4299
4300 return entries;
4301 }
4302
print_lat_help_header(struct seq_file * m)4303 static void print_lat_help_header(struct seq_file *m)
4304 {
4305 seq_puts(m, "# _------=> CPU# \n"
4306 "# / _-----=> irqs-off/BH-disabled\n"
4307 "# | / _----=> need-resched \n"
4308 "# || / _---=> hardirq/softirq \n"
4309 "# ||| / _--=> preempt-depth \n"
4310 "# |||| / _-=> migrate-disable \n"
4311 "# ||||| / delay \n"
4312 "# cmd pid |||||| time | caller \n"
4313 "# \\ / |||||| \\ | / \n");
4314 }
4315
print_event_info(struct array_buffer * buf,struct seq_file * m)4316 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4317 {
4318 unsigned long total;
4319 unsigned long entries;
4320
4321 get_total_entries(buf, &total, &entries);
4322 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4323 entries, total, num_online_cpus());
4324 seq_puts(m, "#\n");
4325 }
4326
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4327 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4328 unsigned int flags)
4329 {
4330 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4331
4332 print_event_info(buf, m);
4333
4334 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4335 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4336 }
4337
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4338 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4339 unsigned int flags)
4340 {
4341 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4342 static const char space[] = " ";
4343 int prec = tgid ? 12 : 2;
4344
4345 print_event_info(buf, m);
4346
4347 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4348 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4349 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4350 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4351 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4352 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4353 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4354 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4355 }
4356
4357 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4358 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4359 {
4360 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4361 struct array_buffer *buf = iter->array_buffer;
4362 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4363 struct tracer *type = iter->trace;
4364 unsigned long entries;
4365 unsigned long total;
4366 const char *name = type->name;
4367
4368 get_total_entries(buf, &total, &entries);
4369
4370 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4371 name, UTS_RELEASE);
4372 seq_puts(m, "# -----------------------------------"
4373 "---------------------------------\n");
4374 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4375 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4376 nsecs_to_usecs(data->saved_latency),
4377 entries,
4378 total,
4379 buf->cpu,
4380 preempt_model_none() ? "server" :
4381 preempt_model_voluntary() ? "desktop" :
4382 preempt_model_full() ? "preempt" :
4383 preempt_model_rt() ? "preempt_rt" :
4384 "unknown",
4385 /* These are reserved for later use */
4386 0, 0, 0, 0);
4387 #ifdef CONFIG_SMP
4388 seq_printf(m, " #P:%d)\n", num_online_cpus());
4389 #else
4390 seq_puts(m, ")\n");
4391 #endif
4392 seq_puts(m, "# -----------------\n");
4393 seq_printf(m, "# | task: %.16s-%d "
4394 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4395 data->comm, data->pid,
4396 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4397 data->policy, data->rt_priority);
4398 seq_puts(m, "# -----------------\n");
4399
4400 if (data->critical_start) {
4401 seq_puts(m, "# => started at: ");
4402 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4403 trace_print_seq(m, &iter->seq);
4404 seq_puts(m, "\n# => ended at: ");
4405 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4406 trace_print_seq(m, &iter->seq);
4407 seq_puts(m, "\n#\n");
4408 }
4409
4410 seq_puts(m, "#\n");
4411 }
4412
test_cpu_buff_start(struct trace_iterator * iter)4413 static void test_cpu_buff_start(struct trace_iterator *iter)
4414 {
4415 struct trace_seq *s = &iter->seq;
4416 struct trace_array *tr = iter->tr;
4417
4418 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4419 return;
4420
4421 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4422 return;
4423
4424 if (cpumask_available(iter->started) &&
4425 cpumask_test_cpu(iter->cpu, iter->started))
4426 return;
4427
4428 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4429 return;
4430
4431 if (cpumask_available(iter->started))
4432 cpumask_set_cpu(iter->cpu, iter->started);
4433
4434 /* Don't print started cpu buffer for the first entry of the trace */
4435 if (iter->idx > 1)
4436 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4437 iter->cpu);
4438 }
4439
print_trace_fmt(struct trace_iterator * iter)4440 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4441 {
4442 struct trace_array *tr = iter->tr;
4443 struct trace_seq *s = &iter->seq;
4444 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4445 struct trace_entry *entry;
4446 struct trace_event *event;
4447
4448 entry = iter->ent;
4449
4450 test_cpu_buff_start(iter);
4451
4452 event = ftrace_find_event(entry->type);
4453
4454 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4455 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4456 trace_print_lat_context(iter);
4457 else
4458 trace_print_context(iter);
4459 }
4460
4461 if (trace_seq_has_overflowed(s))
4462 return TRACE_TYPE_PARTIAL_LINE;
4463
4464 if (event) {
4465 if (tr->trace_flags & TRACE_ITER_FIELDS)
4466 return print_event_fields(iter, event);
4467 return event->funcs->trace(iter, sym_flags, event);
4468 }
4469
4470 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4471
4472 return trace_handle_return(s);
4473 }
4474
print_raw_fmt(struct trace_iterator * iter)4475 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4476 {
4477 struct trace_array *tr = iter->tr;
4478 struct trace_seq *s = &iter->seq;
4479 struct trace_entry *entry;
4480 struct trace_event *event;
4481
4482 entry = iter->ent;
4483
4484 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4485 trace_seq_printf(s, "%d %d %llu ",
4486 entry->pid, iter->cpu, iter->ts);
4487
4488 if (trace_seq_has_overflowed(s))
4489 return TRACE_TYPE_PARTIAL_LINE;
4490
4491 event = ftrace_find_event(entry->type);
4492 if (event)
4493 return event->funcs->raw(iter, 0, event);
4494
4495 trace_seq_printf(s, "%d ?\n", entry->type);
4496
4497 return trace_handle_return(s);
4498 }
4499
print_hex_fmt(struct trace_iterator * iter)4500 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4501 {
4502 struct trace_array *tr = iter->tr;
4503 struct trace_seq *s = &iter->seq;
4504 unsigned char newline = '\n';
4505 struct trace_entry *entry;
4506 struct trace_event *event;
4507
4508 entry = iter->ent;
4509
4510 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4511 SEQ_PUT_HEX_FIELD(s, entry->pid);
4512 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4513 SEQ_PUT_HEX_FIELD(s, iter->ts);
4514 if (trace_seq_has_overflowed(s))
4515 return TRACE_TYPE_PARTIAL_LINE;
4516 }
4517
4518 event = ftrace_find_event(entry->type);
4519 if (event) {
4520 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4521 if (ret != TRACE_TYPE_HANDLED)
4522 return ret;
4523 }
4524
4525 SEQ_PUT_FIELD(s, newline);
4526
4527 return trace_handle_return(s);
4528 }
4529
print_bin_fmt(struct trace_iterator * iter)4530 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4531 {
4532 struct trace_array *tr = iter->tr;
4533 struct trace_seq *s = &iter->seq;
4534 struct trace_entry *entry;
4535 struct trace_event *event;
4536
4537 entry = iter->ent;
4538
4539 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4540 SEQ_PUT_FIELD(s, entry->pid);
4541 SEQ_PUT_FIELD(s, iter->cpu);
4542 SEQ_PUT_FIELD(s, iter->ts);
4543 if (trace_seq_has_overflowed(s))
4544 return TRACE_TYPE_PARTIAL_LINE;
4545 }
4546
4547 event = ftrace_find_event(entry->type);
4548 return event ? event->funcs->binary(iter, 0, event) :
4549 TRACE_TYPE_HANDLED;
4550 }
4551
trace_empty(struct trace_iterator * iter)4552 int trace_empty(struct trace_iterator *iter)
4553 {
4554 struct ring_buffer_iter *buf_iter;
4555 int cpu;
4556
4557 /* If we are looking at one CPU buffer, only check that one */
4558 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4559 cpu = iter->cpu_file;
4560 buf_iter = trace_buffer_iter(iter, cpu);
4561 if (buf_iter) {
4562 if (!ring_buffer_iter_empty(buf_iter))
4563 return 0;
4564 } else {
4565 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4566 return 0;
4567 }
4568 return 1;
4569 }
4570
4571 for_each_tracing_cpu(cpu) {
4572 buf_iter = trace_buffer_iter(iter, cpu);
4573 if (buf_iter) {
4574 if (!ring_buffer_iter_empty(buf_iter))
4575 return 0;
4576 } else {
4577 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4578 return 0;
4579 }
4580 }
4581
4582 return 1;
4583 }
4584
4585 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4586 enum print_line_t print_trace_line(struct trace_iterator *iter)
4587 {
4588 struct trace_array *tr = iter->tr;
4589 unsigned long trace_flags = tr->trace_flags;
4590 enum print_line_t ret;
4591
4592 if (iter->lost_events) {
4593 if (iter->lost_events == (unsigned long)-1)
4594 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4595 iter->cpu);
4596 else
4597 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4598 iter->cpu, iter->lost_events);
4599 if (trace_seq_has_overflowed(&iter->seq))
4600 return TRACE_TYPE_PARTIAL_LINE;
4601 }
4602
4603 if (iter->trace && iter->trace->print_line) {
4604 ret = iter->trace->print_line(iter);
4605 if (ret != TRACE_TYPE_UNHANDLED)
4606 return ret;
4607 }
4608
4609 if (iter->ent->type == TRACE_BPUTS &&
4610 trace_flags & TRACE_ITER_PRINTK &&
4611 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4612 return trace_print_bputs_msg_only(iter);
4613
4614 if (iter->ent->type == TRACE_BPRINT &&
4615 trace_flags & TRACE_ITER_PRINTK &&
4616 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4617 return trace_print_bprintk_msg_only(iter);
4618
4619 if (iter->ent->type == TRACE_PRINT &&
4620 trace_flags & TRACE_ITER_PRINTK &&
4621 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4622 return trace_print_printk_msg_only(iter);
4623
4624 if (trace_flags & TRACE_ITER_BIN)
4625 return print_bin_fmt(iter);
4626
4627 if (trace_flags & TRACE_ITER_HEX)
4628 return print_hex_fmt(iter);
4629
4630 if (trace_flags & TRACE_ITER_RAW)
4631 return print_raw_fmt(iter);
4632
4633 return print_trace_fmt(iter);
4634 }
4635
trace_latency_header(struct seq_file * m)4636 void trace_latency_header(struct seq_file *m)
4637 {
4638 struct trace_iterator *iter = m->private;
4639 struct trace_array *tr = iter->tr;
4640
4641 /* print nothing if the buffers are empty */
4642 if (trace_empty(iter))
4643 return;
4644
4645 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4646 print_trace_header(m, iter);
4647
4648 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4649 print_lat_help_header(m);
4650 }
4651
trace_default_header(struct seq_file * m)4652 void trace_default_header(struct seq_file *m)
4653 {
4654 struct trace_iterator *iter = m->private;
4655 struct trace_array *tr = iter->tr;
4656 unsigned long trace_flags = tr->trace_flags;
4657
4658 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4659 return;
4660
4661 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4662 /* print nothing if the buffers are empty */
4663 if (trace_empty(iter))
4664 return;
4665 print_trace_header(m, iter);
4666 if (!(trace_flags & TRACE_ITER_VERBOSE))
4667 print_lat_help_header(m);
4668 } else {
4669 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4670 if (trace_flags & TRACE_ITER_IRQ_INFO)
4671 print_func_help_header_irq(iter->array_buffer,
4672 m, trace_flags);
4673 else
4674 print_func_help_header(iter->array_buffer, m,
4675 trace_flags);
4676 }
4677 }
4678 }
4679
test_ftrace_alive(struct seq_file * m)4680 static void test_ftrace_alive(struct seq_file *m)
4681 {
4682 if (!ftrace_is_dead())
4683 return;
4684 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4685 "# MAY BE MISSING FUNCTION EVENTS\n");
4686 }
4687
4688 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4689 static void show_snapshot_main_help(struct seq_file *m)
4690 {
4691 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4692 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4693 "# Takes a snapshot of the main buffer.\n"
4694 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4695 "# (Doesn't have to be '2' works with any number that\n"
4696 "# is not a '0' or '1')\n");
4697 }
4698
show_snapshot_percpu_help(struct seq_file * m)4699 static void show_snapshot_percpu_help(struct seq_file *m)
4700 {
4701 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4702 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4703 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4704 "# Takes a snapshot of the main buffer for this cpu.\n");
4705 #else
4706 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4707 "# Must use main snapshot file to allocate.\n");
4708 #endif
4709 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4710 "# (Doesn't have to be '2' works with any number that\n"
4711 "# is not a '0' or '1')\n");
4712 }
4713
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4714 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4715 {
4716 if (iter->tr->allocated_snapshot)
4717 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4718 else
4719 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4720
4721 seq_puts(m, "# Snapshot commands:\n");
4722 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4723 show_snapshot_main_help(m);
4724 else
4725 show_snapshot_percpu_help(m);
4726 }
4727 #else
4728 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4729 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4730 #endif
4731
s_show(struct seq_file * m,void * v)4732 static int s_show(struct seq_file *m, void *v)
4733 {
4734 struct trace_iterator *iter = v;
4735 int ret;
4736
4737 if (iter->ent == NULL) {
4738 if (iter->tr) {
4739 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4740 seq_puts(m, "#\n");
4741 test_ftrace_alive(m);
4742 }
4743 if (iter->snapshot && trace_empty(iter))
4744 print_snapshot_help(m, iter);
4745 else if (iter->trace && iter->trace->print_header)
4746 iter->trace->print_header(m);
4747 else
4748 trace_default_header(m);
4749
4750 } else if (iter->leftover) {
4751 /*
4752 * If we filled the seq_file buffer earlier, we
4753 * want to just show it now.
4754 */
4755 ret = trace_print_seq(m, &iter->seq);
4756
4757 /* ret should this time be zero, but you never know */
4758 iter->leftover = ret;
4759
4760 } else {
4761 ret = print_trace_line(iter);
4762 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4763 iter->seq.full = 0;
4764 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4765 }
4766 ret = trace_print_seq(m, &iter->seq);
4767 /*
4768 * If we overflow the seq_file buffer, then it will
4769 * ask us for this data again at start up.
4770 * Use that instead.
4771 * ret is 0 if seq_file write succeeded.
4772 * -1 otherwise.
4773 */
4774 iter->leftover = ret;
4775 }
4776
4777 return 0;
4778 }
4779
4780 /*
4781 * Should be used after trace_array_get(), trace_types_lock
4782 * ensures that i_cdev was already initialized.
4783 */
tracing_get_cpu(struct inode * inode)4784 static inline int tracing_get_cpu(struct inode *inode)
4785 {
4786 if (inode->i_cdev) /* See trace_create_cpu_file() */
4787 return (long)inode->i_cdev - 1;
4788 return RING_BUFFER_ALL_CPUS;
4789 }
4790
4791 static const struct seq_operations tracer_seq_ops = {
4792 .start = s_start,
4793 .next = s_next,
4794 .stop = s_stop,
4795 .show = s_show,
4796 };
4797
4798 /*
4799 * Note, as iter itself can be allocated and freed in different
4800 * ways, this function is only used to free its content, and not
4801 * the iterator itself. The only requirement to all the allocations
4802 * is that it must zero all fields (kzalloc), as freeing works with
4803 * ethier allocated content or NULL.
4804 */
free_trace_iter_content(struct trace_iterator * iter)4805 static void free_trace_iter_content(struct trace_iterator *iter)
4806 {
4807 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4808 if (iter->fmt != static_fmt_buf)
4809 kfree(iter->fmt);
4810
4811 kfree(iter->temp);
4812 kfree(iter->buffer_iter);
4813 mutex_destroy(&iter->mutex);
4814 free_cpumask_var(iter->started);
4815 }
4816
4817 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4818 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4819 {
4820 struct trace_array *tr = inode->i_private;
4821 struct trace_iterator *iter;
4822 int cpu;
4823
4824 if (tracing_disabled)
4825 return ERR_PTR(-ENODEV);
4826
4827 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4828 if (!iter)
4829 return ERR_PTR(-ENOMEM);
4830
4831 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4832 GFP_KERNEL);
4833 if (!iter->buffer_iter)
4834 goto release;
4835
4836 /*
4837 * trace_find_next_entry() may need to save off iter->ent.
4838 * It will place it into the iter->temp buffer. As most
4839 * events are less than 128, allocate a buffer of that size.
4840 * If one is greater, then trace_find_next_entry() will
4841 * allocate a new buffer to adjust for the bigger iter->ent.
4842 * It's not critical if it fails to get allocated here.
4843 */
4844 iter->temp = kmalloc(128, GFP_KERNEL);
4845 if (iter->temp)
4846 iter->temp_size = 128;
4847
4848 /*
4849 * trace_event_printf() may need to modify given format
4850 * string to replace %p with %px so that it shows real address
4851 * instead of hash value. However, that is only for the event
4852 * tracing, other tracer may not need. Defer the allocation
4853 * until it is needed.
4854 */
4855 iter->fmt = NULL;
4856 iter->fmt_size = 0;
4857
4858 mutex_lock(&trace_types_lock);
4859 iter->trace = tr->current_trace;
4860
4861 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4862 goto fail;
4863
4864 iter->tr = tr;
4865
4866 #ifdef CONFIG_TRACER_MAX_TRACE
4867 /* Currently only the top directory has a snapshot */
4868 if (tr->current_trace->print_max || snapshot)
4869 iter->array_buffer = &tr->max_buffer;
4870 else
4871 #endif
4872 iter->array_buffer = &tr->array_buffer;
4873 iter->snapshot = snapshot;
4874 iter->pos = -1;
4875 iter->cpu_file = tracing_get_cpu(inode);
4876 mutex_init(&iter->mutex);
4877
4878 /* Notify the tracer early; before we stop tracing. */
4879 if (iter->trace->open)
4880 iter->trace->open(iter);
4881
4882 /* Annotate start of buffers if we had overruns */
4883 if (ring_buffer_overruns(iter->array_buffer->buffer))
4884 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4885
4886 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4887 if (trace_clocks[tr->clock_id].in_ns)
4888 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4889
4890 /*
4891 * If pause-on-trace is enabled, then stop the trace while
4892 * dumping, unless this is the "snapshot" file
4893 */
4894 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4895 tracing_stop_tr(tr);
4896
4897 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4898 for_each_tracing_cpu(cpu) {
4899 iter->buffer_iter[cpu] =
4900 ring_buffer_read_prepare(iter->array_buffer->buffer,
4901 cpu, GFP_KERNEL);
4902 }
4903 ring_buffer_read_prepare_sync();
4904 for_each_tracing_cpu(cpu) {
4905 ring_buffer_read_start(iter->buffer_iter[cpu]);
4906 tracing_iter_reset(iter, cpu);
4907 }
4908 } else {
4909 cpu = iter->cpu_file;
4910 iter->buffer_iter[cpu] =
4911 ring_buffer_read_prepare(iter->array_buffer->buffer,
4912 cpu, GFP_KERNEL);
4913 ring_buffer_read_prepare_sync();
4914 ring_buffer_read_start(iter->buffer_iter[cpu]);
4915 tracing_iter_reset(iter, cpu);
4916 }
4917
4918 mutex_unlock(&trace_types_lock);
4919
4920 return iter;
4921
4922 fail:
4923 mutex_unlock(&trace_types_lock);
4924 free_trace_iter_content(iter);
4925 release:
4926 seq_release_private(inode, file);
4927 return ERR_PTR(-ENOMEM);
4928 }
4929
tracing_open_generic(struct inode * inode,struct file * filp)4930 int tracing_open_generic(struct inode *inode, struct file *filp)
4931 {
4932 int ret;
4933
4934 ret = tracing_check_open_get_tr(NULL);
4935 if (ret)
4936 return ret;
4937
4938 filp->private_data = inode->i_private;
4939 return 0;
4940 }
4941
tracing_is_disabled(void)4942 bool tracing_is_disabled(void)
4943 {
4944 return (tracing_disabled) ? true: false;
4945 }
4946
4947 /*
4948 * Open and update trace_array ref count.
4949 * Must have the current trace_array passed to it.
4950 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4951 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4952 {
4953 struct trace_array *tr = inode->i_private;
4954 int ret;
4955
4956 ret = tracing_check_open_get_tr(tr);
4957 if (ret)
4958 return ret;
4959
4960 filp->private_data = inode->i_private;
4961
4962 return 0;
4963 }
4964
4965 /*
4966 * The private pointer of the inode is the trace_event_file.
4967 * Update the tr ref count associated to it.
4968 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4969 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4970 {
4971 struct trace_event_file *file = inode->i_private;
4972 int ret;
4973
4974 ret = tracing_check_open_get_tr(file->tr);
4975 if (ret)
4976 return ret;
4977
4978 mutex_lock(&event_mutex);
4979
4980 /* Fail if the file is marked for removal */
4981 if (file->flags & EVENT_FILE_FL_FREED) {
4982 trace_array_put(file->tr);
4983 ret = -ENODEV;
4984 } else {
4985 event_file_get(file);
4986 }
4987
4988 mutex_unlock(&event_mutex);
4989 if (ret)
4990 return ret;
4991
4992 filp->private_data = inode->i_private;
4993
4994 return 0;
4995 }
4996
tracing_release_file_tr(struct inode * inode,struct file * filp)4997 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4998 {
4999 struct trace_event_file *file = inode->i_private;
5000
5001 trace_array_put(file->tr);
5002 event_file_put(file);
5003
5004 return 0;
5005 }
5006
tracing_single_release_file_tr(struct inode * inode,struct file * filp)5007 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5008 {
5009 tracing_release_file_tr(inode, filp);
5010 return single_release(inode, filp);
5011 }
5012
tracing_mark_open(struct inode * inode,struct file * filp)5013 static int tracing_mark_open(struct inode *inode, struct file *filp)
5014 {
5015 stream_open(inode, filp);
5016 return tracing_open_generic_tr(inode, filp);
5017 }
5018
tracing_release(struct inode * inode,struct file * file)5019 static int tracing_release(struct inode *inode, struct file *file)
5020 {
5021 struct trace_array *tr = inode->i_private;
5022 struct seq_file *m = file->private_data;
5023 struct trace_iterator *iter;
5024 int cpu;
5025
5026 if (!(file->f_mode & FMODE_READ)) {
5027 trace_array_put(tr);
5028 return 0;
5029 }
5030
5031 /* Writes do not use seq_file */
5032 iter = m->private;
5033 mutex_lock(&trace_types_lock);
5034
5035 for_each_tracing_cpu(cpu) {
5036 if (iter->buffer_iter[cpu])
5037 ring_buffer_read_finish(iter->buffer_iter[cpu]);
5038 }
5039
5040 if (iter->trace && iter->trace->close)
5041 iter->trace->close(iter);
5042
5043 if (!iter->snapshot && tr->stop_count)
5044 /* reenable tracing if it was previously enabled */
5045 tracing_start_tr(tr);
5046
5047 __trace_array_put(tr);
5048
5049 mutex_unlock(&trace_types_lock);
5050
5051 free_trace_iter_content(iter);
5052 seq_release_private(inode, file);
5053
5054 return 0;
5055 }
5056
tracing_release_generic_tr(struct inode * inode,struct file * file)5057 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5058 {
5059 struct trace_array *tr = inode->i_private;
5060
5061 trace_array_put(tr);
5062 return 0;
5063 }
5064
tracing_single_release_tr(struct inode * inode,struct file * file)5065 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5066 {
5067 struct trace_array *tr = inode->i_private;
5068
5069 trace_array_put(tr);
5070
5071 return single_release(inode, file);
5072 }
5073
tracing_open(struct inode * inode,struct file * file)5074 static int tracing_open(struct inode *inode, struct file *file)
5075 {
5076 struct trace_array *tr = inode->i_private;
5077 struct trace_iterator *iter;
5078 int ret;
5079
5080 ret = tracing_check_open_get_tr(tr);
5081 if (ret)
5082 return ret;
5083
5084 /* If this file was open for write, then erase contents */
5085 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5086 int cpu = tracing_get_cpu(inode);
5087 struct array_buffer *trace_buf = &tr->array_buffer;
5088
5089 #ifdef CONFIG_TRACER_MAX_TRACE
5090 if (tr->current_trace->print_max)
5091 trace_buf = &tr->max_buffer;
5092 #endif
5093
5094 if (cpu == RING_BUFFER_ALL_CPUS)
5095 tracing_reset_online_cpus(trace_buf);
5096 else
5097 tracing_reset_cpu(trace_buf, cpu);
5098 }
5099
5100 if (file->f_mode & FMODE_READ) {
5101 iter = __tracing_open(inode, file, false);
5102 if (IS_ERR(iter))
5103 ret = PTR_ERR(iter);
5104 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5105 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5106 }
5107
5108 if (ret < 0)
5109 trace_array_put(tr);
5110
5111 return ret;
5112 }
5113
5114 /*
5115 * Some tracers are not suitable for instance buffers.
5116 * A tracer is always available for the global array (toplevel)
5117 * or if it explicitly states that it is.
5118 */
5119 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5120 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5121 {
5122 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5123 }
5124
5125 /* Find the next tracer that this trace array may use */
5126 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5127 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5128 {
5129 while (t && !trace_ok_for_array(t, tr))
5130 t = t->next;
5131
5132 return t;
5133 }
5134
5135 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5136 t_next(struct seq_file *m, void *v, loff_t *pos)
5137 {
5138 struct trace_array *tr = m->private;
5139 struct tracer *t = v;
5140
5141 (*pos)++;
5142
5143 if (t)
5144 t = get_tracer_for_array(tr, t->next);
5145
5146 return t;
5147 }
5148
t_start(struct seq_file * m,loff_t * pos)5149 static void *t_start(struct seq_file *m, loff_t *pos)
5150 {
5151 struct trace_array *tr = m->private;
5152 struct tracer *t;
5153 loff_t l = 0;
5154
5155 mutex_lock(&trace_types_lock);
5156
5157 t = get_tracer_for_array(tr, trace_types);
5158 for (; t && l < *pos; t = t_next(m, t, &l))
5159 ;
5160
5161 return t;
5162 }
5163
t_stop(struct seq_file * m,void * p)5164 static void t_stop(struct seq_file *m, void *p)
5165 {
5166 mutex_unlock(&trace_types_lock);
5167 }
5168
t_show(struct seq_file * m,void * v)5169 static int t_show(struct seq_file *m, void *v)
5170 {
5171 struct tracer *t = v;
5172
5173 if (!t)
5174 return 0;
5175
5176 seq_puts(m, t->name);
5177 if (t->next)
5178 seq_putc(m, ' ');
5179 else
5180 seq_putc(m, '\n');
5181
5182 return 0;
5183 }
5184
5185 static const struct seq_operations show_traces_seq_ops = {
5186 .start = t_start,
5187 .next = t_next,
5188 .stop = t_stop,
5189 .show = t_show,
5190 };
5191
show_traces_open(struct inode * inode,struct file * file)5192 static int show_traces_open(struct inode *inode, struct file *file)
5193 {
5194 struct trace_array *tr = inode->i_private;
5195 struct seq_file *m;
5196 int ret;
5197
5198 ret = tracing_check_open_get_tr(tr);
5199 if (ret)
5200 return ret;
5201
5202 ret = seq_open(file, &show_traces_seq_ops);
5203 if (ret) {
5204 trace_array_put(tr);
5205 return ret;
5206 }
5207
5208 m = file->private_data;
5209 m->private = tr;
5210
5211 return 0;
5212 }
5213
show_traces_release(struct inode * inode,struct file * file)5214 static int show_traces_release(struct inode *inode, struct file *file)
5215 {
5216 struct trace_array *tr = inode->i_private;
5217
5218 trace_array_put(tr);
5219 return seq_release(inode, file);
5220 }
5221
5222 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5223 tracing_write_stub(struct file *filp, const char __user *ubuf,
5224 size_t count, loff_t *ppos)
5225 {
5226 return count;
5227 }
5228
tracing_lseek(struct file * file,loff_t offset,int whence)5229 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5230 {
5231 int ret;
5232
5233 if (file->f_mode & FMODE_READ)
5234 ret = seq_lseek(file, offset, whence);
5235 else
5236 file->f_pos = ret = 0;
5237
5238 return ret;
5239 }
5240
5241 static const struct file_operations tracing_fops = {
5242 .open = tracing_open,
5243 .read = seq_read,
5244 .read_iter = seq_read_iter,
5245 .splice_read = copy_splice_read,
5246 .write = tracing_write_stub,
5247 .llseek = tracing_lseek,
5248 .release = tracing_release,
5249 };
5250
5251 static const struct file_operations show_traces_fops = {
5252 .open = show_traces_open,
5253 .read = seq_read,
5254 .llseek = seq_lseek,
5255 .release = show_traces_release,
5256 };
5257
5258 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5259 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5260 size_t count, loff_t *ppos)
5261 {
5262 struct trace_array *tr = file_inode(filp)->i_private;
5263 char *mask_str;
5264 int len;
5265
5266 len = snprintf(NULL, 0, "%*pb\n",
5267 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5268 mask_str = kmalloc(len, GFP_KERNEL);
5269 if (!mask_str)
5270 return -ENOMEM;
5271
5272 len = snprintf(mask_str, len, "%*pb\n",
5273 cpumask_pr_args(tr->tracing_cpumask));
5274 if (len >= count) {
5275 count = -EINVAL;
5276 goto out_err;
5277 }
5278 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5279
5280 out_err:
5281 kfree(mask_str);
5282
5283 return count;
5284 }
5285
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5286 int tracing_set_cpumask(struct trace_array *tr,
5287 cpumask_var_t tracing_cpumask_new)
5288 {
5289 int cpu;
5290
5291 if (!tr)
5292 return -EINVAL;
5293
5294 local_irq_disable();
5295 arch_spin_lock(&tr->max_lock);
5296 for_each_tracing_cpu(cpu) {
5297 /*
5298 * Increase/decrease the disabled counter if we are
5299 * about to flip a bit in the cpumask:
5300 */
5301 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5302 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5303 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5304 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5305 #ifdef CONFIG_TRACER_MAX_TRACE
5306 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5307 #endif
5308 }
5309 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5310 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5311 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5312 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5313 #ifdef CONFIG_TRACER_MAX_TRACE
5314 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5315 #endif
5316 }
5317 }
5318 arch_spin_unlock(&tr->max_lock);
5319 local_irq_enable();
5320
5321 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5322
5323 return 0;
5324 }
5325
5326 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5327 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5328 size_t count, loff_t *ppos)
5329 {
5330 struct trace_array *tr = file_inode(filp)->i_private;
5331 cpumask_var_t tracing_cpumask_new;
5332 int err;
5333
5334 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5335 return -ENOMEM;
5336
5337 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5338 if (err)
5339 goto err_free;
5340
5341 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5342 if (err)
5343 goto err_free;
5344
5345 free_cpumask_var(tracing_cpumask_new);
5346
5347 return count;
5348
5349 err_free:
5350 free_cpumask_var(tracing_cpumask_new);
5351
5352 return err;
5353 }
5354
5355 static const struct file_operations tracing_cpumask_fops = {
5356 .open = tracing_open_generic_tr,
5357 .read = tracing_cpumask_read,
5358 .write = tracing_cpumask_write,
5359 .release = tracing_release_generic_tr,
5360 .llseek = generic_file_llseek,
5361 };
5362
tracing_trace_options_show(struct seq_file * m,void * v)5363 static int tracing_trace_options_show(struct seq_file *m, void *v)
5364 {
5365 struct tracer_opt *trace_opts;
5366 struct trace_array *tr = m->private;
5367 u32 tracer_flags;
5368 int i;
5369
5370 mutex_lock(&trace_types_lock);
5371 tracer_flags = tr->current_trace->flags->val;
5372 trace_opts = tr->current_trace->flags->opts;
5373
5374 for (i = 0; trace_options[i]; i++) {
5375 if (tr->trace_flags & (1 << i))
5376 seq_printf(m, "%s\n", trace_options[i]);
5377 else
5378 seq_printf(m, "no%s\n", trace_options[i]);
5379 }
5380
5381 for (i = 0; trace_opts[i].name; i++) {
5382 if (tracer_flags & trace_opts[i].bit)
5383 seq_printf(m, "%s\n", trace_opts[i].name);
5384 else
5385 seq_printf(m, "no%s\n", trace_opts[i].name);
5386 }
5387 mutex_unlock(&trace_types_lock);
5388
5389 return 0;
5390 }
5391
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5392 static int __set_tracer_option(struct trace_array *tr,
5393 struct tracer_flags *tracer_flags,
5394 struct tracer_opt *opts, int neg)
5395 {
5396 struct tracer *trace = tracer_flags->trace;
5397 int ret;
5398
5399 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5400 if (ret)
5401 return ret;
5402
5403 if (neg)
5404 tracer_flags->val &= ~opts->bit;
5405 else
5406 tracer_flags->val |= opts->bit;
5407 return 0;
5408 }
5409
5410 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5411 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5412 {
5413 struct tracer *trace = tr->current_trace;
5414 struct tracer_flags *tracer_flags = trace->flags;
5415 struct tracer_opt *opts = NULL;
5416 int i;
5417
5418 for (i = 0; tracer_flags->opts[i].name; i++) {
5419 opts = &tracer_flags->opts[i];
5420
5421 if (strcmp(cmp, opts->name) == 0)
5422 return __set_tracer_option(tr, trace->flags, opts, neg);
5423 }
5424
5425 return -EINVAL;
5426 }
5427
5428 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5429 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5430 {
5431 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5432 return -1;
5433
5434 return 0;
5435 }
5436
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5437 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5438 {
5439 int *map;
5440
5441 if ((mask == TRACE_ITER_RECORD_TGID) ||
5442 (mask == TRACE_ITER_RECORD_CMD))
5443 lockdep_assert_held(&event_mutex);
5444
5445 /* do nothing if flag is already set */
5446 if (!!(tr->trace_flags & mask) == !!enabled)
5447 return 0;
5448
5449 /* Give the tracer a chance to approve the change */
5450 if (tr->current_trace->flag_changed)
5451 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5452 return -EINVAL;
5453
5454 if (enabled)
5455 tr->trace_flags |= mask;
5456 else
5457 tr->trace_flags &= ~mask;
5458
5459 if (mask == TRACE_ITER_RECORD_CMD)
5460 trace_event_enable_cmd_record(enabled);
5461
5462 if (mask == TRACE_ITER_RECORD_TGID) {
5463 if (!tgid_map) {
5464 tgid_map_max = pid_max;
5465 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5466 GFP_KERNEL);
5467
5468 /*
5469 * Pairs with smp_load_acquire() in
5470 * trace_find_tgid_ptr() to ensure that if it observes
5471 * the tgid_map we just allocated then it also observes
5472 * the corresponding tgid_map_max value.
5473 */
5474 smp_store_release(&tgid_map, map);
5475 }
5476 if (!tgid_map) {
5477 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5478 return -ENOMEM;
5479 }
5480
5481 trace_event_enable_tgid_record(enabled);
5482 }
5483
5484 if (mask == TRACE_ITER_EVENT_FORK)
5485 trace_event_follow_fork(tr, enabled);
5486
5487 if (mask == TRACE_ITER_FUNC_FORK)
5488 ftrace_pid_follow_fork(tr, enabled);
5489
5490 if (mask == TRACE_ITER_OVERWRITE) {
5491 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5492 #ifdef CONFIG_TRACER_MAX_TRACE
5493 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5494 #endif
5495 }
5496
5497 if (mask == TRACE_ITER_PRINTK) {
5498 trace_printk_start_stop_comm(enabled);
5499 trace_printk_control(enabled);
5500 }
5501
5502 return 0;
5503 }
5504
trace_set_options(struct trace_array * tr,char * option)5505 int trace_set_options(struct trace_array *tr, char *option)
5506 {
5507 char *cmp;
5508 int neg = 0;
5509 int ret;
5510 size_t orig_len = strlen(option);
5511 int len;
5512
5513 cmp = strstrip(option);
5514
5515 len = str_has_prefix(cmp, "no");
5516 if (len)
5517 neg = 1;
5518
5519 cmp += len;
5520
5521 mutex_lock(&event_mutex);
5522 mutex_lock(&trace_types_lock);
5523
5524 ret = match_string(trace_options, -1, cmp);
5525 /* If no option could be set, test the specific tracer options */
5526 if (ret < 0)
5527 ret = set_tracer_option(tr, cmp, neg);
5528 else
5529 ret = set_tracer_flag(tr, 1 << ret, !neg);
5530
5531 mutex_unlock(&trace_types_lock);
5532 mutex_unlock(&event_mutex);
5533
5534 /*
5535 * If the first trailing whitespace is replaced with '\0' by strstrip,
5536 * turn it back into a space.
5537 */
5538 if (orig_len > strlen(option))
5539 option[strlen(option)] = ' ';
5540
5541 return ret;
5542 }
5543
apply_trace_boot_options(void)5544 static void __init apply_trace_boot_options(void)
5545 {
5546 char *buf = trace_boot_options_buf;
5547 char *option;
5548
5549 while (true) {
5550 option = strsep(&buf, ",");
5551
5552 if (!option)
5553 break;
5554
5555 if (*option)
5556 trace_set_options(&global_trace, option);
5557
5558 /* Put back the comma to allow this to be called again */
5559 if (buf)
5560 *(buf - 1) = ',';
5561 }
5562 }
5563
5564 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5565 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5566 size_t cnt, loff_t *ppos)
5567 {
5568 struct seq_file *m = filp->private_data;
5569 struct trace_array *tr = m->private;
5570 char buf[64];
5571 int ret;
5572
5573 if (cnt >= sizeof(buf))
5574 return -EINVAL;
5575
5576 if (copy_from_user(buf, ubuf, cnt))
5577 return -EFAULT;
5578
5579 buf[cnt] = 0;
5580
5581 ret = trace_set_options(tr, buf);
5582 if (ret < 0)
5583 return ret;
5584
5585 *ppos += cnt;
5586
5587 return cnt;
5588 }
5589
tracing_trace_options_open(struct inode * inode,struct file * file)5590 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5591 {
5592 struct trace_array *tr = inode->i_private;
5593 int ret;
5594
5595 ret = tracing_check_open_get_tr(tr);
5596 if (ret)
5597 return ret;
5598
5599 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5600 if (ret < 0)
5601 trace_array_put(tr);
5602
5603 return ret;
5604 }
5605
5606 static const struct file_operations tracing_iter_fops = {
5607 .open = tracing_trace_options_open,
5608 .read = seq_read,
5609 .llseek = seq_lseek,
5610 .release = tracing_single_release_tr,
5611 .write = tracing_trace_options_write,
5612 };
5613
5614 static const char readme_msg[] =
5615 "tracing mini-HOWTO:\n\n"
5616 "# echo 0 > tracing_on : quick way to disable tracing\n"
5617 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5618 " Important files:\n"
5619 " trace\t\t\t- The static contents of the buffer\n"
5620 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5621 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5622 " current_tracer\t- function and latency tracers\n"
5623 " available_tracers\t- list of configured tracers for current_tracer\n"
5624 " error_log\t- error log for failed commands (that support it)\n"
5625 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5626 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5627 " trace_clock\t\t- change the clock used to order events\n"
5628 " local: Per cpu clock but may not be synced across CPUs\n"
5629 " global: Synced across CPUs but slows tracing down.\n"
5630 " counter: Not a clock, but just an increment\n"
5631 " uptime: Jiffy counter from time of boot\n"
5632 " perf: Same clock that perf events use\n"
5633 #ifdef CONFIG_X86_64
5634 " x86-tsc: TSC cycle counter\n"
5635 #endif
5636 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5637 " delta: Delta difference against a buffer-wide timestamp\n"
5638 " absolute: Absolute (standalone) timestamp\n"
5639 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5640 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5641 " tracing_cpumask\t- Limit which CPUs to trace\n"
5642 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5643 "\t\t\t Remove sub-buffer with rmdir\n"
5644 " trace_options\t\t- Set format or modify how tracing happens\n"
5645 "\t\t\t Disable an option by prefixing 'no' to the\n"
5646 "\t\t\t option name\n"
5647 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5648 #ifdef CONFIG_DYNAMIC_FTRACE
5649 "\n available_filter_functions - list of functions that can be filtered on\n"
5650 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5651 "\t\t\t functions\n"
5652 "\t accepts: func_full_name or glob-matching-pattern\n"
5653 "\t modules: Can select a group via module\n"
5654 "\t Format: :mod:<module-name>\n"
5655 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5656 "\t triggers: a command to perform when function is hit\n"
5657 "\t Format: <function>:<trigger>[:count]\n"
5658 "\t trigger: traceon, traceoff\n"
5659 "\t\t enable_event:<system>:<event>\n"
5660 "\t\t disable_event:<system>:<event>\n"
5661 #ifdef CONFIG_STACKTRACE
5662 "\t\t stacktrace\n"
5663 #endif
5664 #ifdef CONFIG_TRACER_SNAPSHOT
5665 "\t\t snapshot\n"
5666 #endif
5667 "\t\t dump\n"
5668 "\t\t cpudump\n"
5669 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5670 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5671 "\t The first one will disable tracing every time do_fault is hit\n"
5672 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5673 "\t The first time do trap is hit and it disables tracing, the\n"
5674 "\t counter will decrement to 2. If tracing is already disabled,\n"
5675 "\t the counter will not decrement. It only decrements when the\n"
5676 "\t trigger did work\n"
5677 "\t To remove trigger without count:\n"
5678 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5679 "\t To remove trigger with a count:\n"
5680 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5681 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5682 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5683 "\t modules: Can select a group via module command :mod:\n"
5684 "\t Does not accept triggers\n"
5685 #endif /* CONFIG_DYNAMIC_FTRACE */
5686 #ifdef CONFIG_FUNCTION_TRACER
5687 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5688 "\t\t (function)\n"
5689 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5690 "\t\t (function)\n"
5691 #endif
5692 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5693 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5694 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5695 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5696 #endif
5697 #ifdef CONFIG_TRACER_SNAPSHOT
5698 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5699 "\t\t\t snapshot buffer. Read the contents for more\n"
5700 "\t\t\t information\n"
5701 #endif
5702 #ifdef CONFIG_STACK_TRACER
5703 " stack_trace\t\t- Shows the max stack trace when active\n"
5704 " stack_max_size\t- Shows current max stack size that was traced\n"
5705 "\t\t\t Write into this file to reset the max size (trigger a\n"
5706 "\t\t\t new trace)\n"
5707 #ifdef CONFIG_DYNAMIC_FTRACE
5708 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5709 "\t\t\t traces\n"
5710 #endif
5711 #endif /* CONFIG_STACK_TRACER */
5712 #ifdef CONFIG_DYNAMIC_EVENTS
5713 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5714 "\t\t\t Write into this file to define/undefine new trace events.\n"
5715 #endif
5716 #ifdef CONFIG_KPROBE_EVENTS
5717 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5718 "\t\t\t Write into this file to define/undefine new trace events.\n"
5719 #endif
5720 #ifdef CONFIG_UPROBE_EVENTS
5721 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5722 "\t\t\t Write into this file to define/undefine new trace events.\n"
5723 #endif
5724 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5725 defined(CONFIG_FPROBE_EVENTS)
5726 "\t accepts: event-definitions (one definition per line)\n"
5727 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5728 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5729 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5730 #endif
5731 #ifdef CONFIG_FPROBE_EVENTS
5732 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5733 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5734 #endif
5735 #ifdef CONFIG_HIST_TRIGGERS
5736 "\t s:[synthetic/]<event> <field> [<field>]\n"
5737 #endif
5738 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5739 "\t -:[<group>/][<event>]\n"
5740 #ifdef CONFIG_KPROBE_EVENTS
5741 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5742 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5743 #endif
5744 #ifdef CONFIG_UPROBE_EVENTS
5745 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5746 #endif
5747 "\t args: <name>=fetcharg[:type]\n"
5748 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5749 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5750 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5751 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5752 "\t <argname>[->field[->field|.field...]],\n"
5753 #else
5754 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5755 #endif
5756 #else
5757 "\t $stack<index>, $stack, $retval, $comm,\n"
5758 #endif
5759 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5760 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5761 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5762 "\t symstr, <type>\\[<array-size>\\]\n"
5763 #ifdef CONFIG_HIST_TRIGGERS
5764 "\t field: <stype> <name>;\n"
5765 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5766 "\t [unsigned] char/int/long\n"
5767 #endif
5768 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5769 "\t of the <attached-group>/<attached-event>.\n"
5770 #endif
5771 " events/\t\t- Directory containing all trace event subsystems:\n"
5772 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5773 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5774 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5775 "\t\t\t events\n"
5776 " filter\t\t- If set, only events passing filter are traced\n"
5777 " events/<system>/<event>/\t- Directory containing control files for\n"
5778 "\t\t\t <event>:\n"
5779 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5780 " filter\t\t- If set, only events passing filter are traced\n"
5781 " trigger\t\t- If set, a command to perform when event is hit\n"
5782 "\t Format: <trigger>[:count][if <filter>]\n"
5783 "\t trigger: traceon, traceoff\n"
5784 "\t enable_event:<system>:<event>\n"
5785 "\t disable_event:<system>:<event>\n"
5786 #ifdef CONFIG_HIST_TRIGGERS
5787 "\t enable_hist:<system>:<event>\n"
5788 "\t disable_hist:<system>:<event>\n"
5789 #endif
5790 #ifdef CONFIG_STACKTRACE
5791 "\t\t stacktrace\n"
5792 #endif
5793 #ifdef CONFIG_TRACER_SNAPSHOT
5794 "\t\t snapshot\n"
5795 #endif
5796 #ifdef CONFIG_HIST_TRIGGERS
5797 "\t\t hist (see below)\n"
5798 #endif
5799 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5800 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5801 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5802 "\t events/block/block_unplug/trigger\n"
5803 "\t The first disables tracing every time block_unplug is hit.\n"
5804 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5805 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5806 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5807 "\t Like function triggers, the counter is only decremented if it\n"
5808 "\t enabled or disabled tracing.\n"
5809 "\t To remove a trigger without a count:\n"
5810 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5811 "\t To remove a trigger with a count:\n"
5812 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5813 "\t Filters can be ignored when removing a trigger.\n"
5814 #ifdef CONFIG_HIST_TRIGGERS
5815 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5816 "\t Format: hist:keys=<field1[,field2,...]>\n"
5817 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5818 "\t [:values=<field1[,field2,...]>]\n"
5819 "\t [:sort=<field1[,field2,...]>]\n"
5820 "\t [:size=#entries]\n"
5821 "\t [:pause][:continue][:clear]\n"
5822 "\t [:name=histname1]\n"
5823 "\t [:nohitcount]\n"
5824 "\t [:<handler>.<action>]\n"
5825 "\t [if <filter>]\n\n"
5826 "\t Note, special fields can be used as well:\n"
5827 "\t common_timestamp - to record current timestamp\n"
5828 "\t common_cpu - to record the CPU the event happened on\n"
5829 "\n"
5830 "\t A hist trigger variable can be:\n"
5831 "\t - a reference to a field e.g. x=current_timestamp,\n"
5832 "\t - a reference to another variable e.g. y=$x,\n"
5833 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5834 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5835 "\n"
5836 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5837 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5838 "\t variable reference, field or numeric literal.\n"
5839 "\n"
5840 "\t When a matching event is hit, an entry is added to a hash\n"
5841 "\t table using the key(s) and value(s) named, and the value of a\n"
5842 "\t sum called 'hitcount' is incremented. Keys and values\n"
5843 "\t correspond to fields in the event's format description. Keys\n"
5844 "\t can be any field, or the special string 'common_stacktrace'.\n"
5845 "\t Compound keys consisting of up to two fields can be specified\n"
5846 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5847 "\t fields. Sort keys consisting of up to two fields can be\n"
5848 "\t specified using the 'sort' keyword. The sort direction can\n"
5849 "\t be modified by appending '.descending' or '.ascending' to a\n"
5850 "\t sort field. The 'size' parameter can be used to specify more\n"
5851 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5852 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5853 "\t its histogram data will be shared with other triggers of the\n"
5854 "\t same name, and trigger hits will update this common data.\n\n"
5855 "\t Reading the 'hist' file for the event will dump the hash\n"
5856 "\t table in its entirety to stdout. If there are multiple hist\n"
5857 "\t triggers attached to an event, there will be a table for each\n"
5858 "\t trigger in the output. The table displayed for a named\n"
5859 "\t trigger will be the same as any other instance having the\n"
5860 "\t same name. The default format used to display a given field\n"
5861 "\t can be modified by appending any of the following modifiers\n"
5862 "\t to the field name, as applicable:\n\n"
5863 "\t .hex display a number as a hex value\n"
5864 "\t .sym display an address as a symbol\n"
5865 "\t .sym-offset display an address as a symbol and offset\n"
5866 "\t .execname display a common_pid as a program name\n"
5867 "\t .syscall display a syscall id as a syscall name\n"
5868 "\t .log2 display log2 value rather than raw number\n"
5869 "\t .buckets=size display values in groups of size rather than raw number\n"
5870 "\t .usecs display a common_timestamp in microseconds\n"
5871 "\t .percent display a number of percentage value\n"
5872 "\t .graph display a bar-graph of a value\n\n"
5873 "\t The 'pause' parameter can be used to pause an existing hist\n"
5874 "\t trigger or to start a hist trigger but not log any events\n"
5875 "\t until told to do so. 'continue' can be used to start or\n"
5876 "\t restart a paused hist trigger.\n\n"
5877 "\t The 'clear' parameter will clear the contents of a running\n"
5878 "\t hist trigger and leave its current paused/active state\n"
5879 "\t unchanged.\n\n"
5880 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5881 "\t raw hitcount in the histogram.\n\n"
5882 "\t The enable_hist and disable_hist triggers can be used to\n"
5883 "\t have one event conditionally start and stop another event's\n"
5884 "\t already-attached hist trigger. The syntax is analogous to\n"
5885 "\t the enable_event and disable_event triggers.\n\n"
5886 "\t Hist trigger handlers and actions are executed whenever a\n"
5887 "\t a histogram entry is added or updated. They take the form:\n\n"
5888 "\t <handler>.<action>\n\n"
5889 "\t The available handlers are:\n\n"
5890 "\t onmatch(matching.event) - invoke on addition or update\n"
5891 "\t onmax(var) - invoke if var exceeds current max\n"
5892 "\t onchange(var) - invoke action if var changes\n\n"
5893 "\t The available actions are:\n\n"
5894 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5895 "\t save(field,...) - save current event fields\n"
5896 #ifdef CONFIG_TRACER_SNAPSHOT
5897 "\t snapshot() - snapshot the trace buffer\n\n"
5898 #endif
5899 #ifdef CONFIG_SYNTH_EVENTS
5900 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5901 "\t Write into this file to define/undefine new synthetic events.\n"
5902 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5903 #endif
5904 #endif
5905 ;
5906
5907 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5908 tracing_readme_read(struct file *filp, char __user *ubuf,
5909 size_t cnt, loff_t *ppos)
5910 {
5911 return simple_read_from_buffer(ubuf, cnt, ppos,
5912 readme_msg, strlen(readme_msg));
5913 }
5914
5915 static const struct file_operations tracing_readme_fops = {
5916 .open = tracing_open_generic,
5917 .read = tracing_readme_read,
5918 .llseek = generic_file_llseek,
5919 };
5920
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5921 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5922 {
5923 int pid = ++(*pos);
5924
5925 return trace_find_tgid_ptr(pid);
5926 }
5927
saved_tgids_start(struct seq_file * m,loff_t * pos)5928 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5929 {
5930 int pid = *pos;
5931
5932 return trace_find_tgid_ptr(pid);
5933 }
5934
saved_tgids_stop(struct seq_file * m,void * v)5935 static void saved_tgids_stop(struct seq_file *m, void *v)
5936 {
5937 }
5938
saved_tgids_show(struct seq_file * m,void * v)5939 static int saved_tgids_show(struct seq_file *m, void *v)
5940 {
5941 int *entry = (int *)v;
5942 int pid = entry - tgid_map;
5943 int tgid = *entry;
5944
5945 if (tgid == 0)
5946 return SEQ_SKIP;
5947
5948 seq_printf(m, "%d %d\n", pid, tgid);
5949 return 0;
5950 }
5951
5952 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5953 .start = saved_tgids_start,
5954 .stop = saved_tgids_stop,
5955 .next = saved_tgids_next,
5956 .show = saved_tgids_show,
5957 };
5958
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5959 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5960 {
5961 int ret;
5962
5963 ret = tracing_check_open_get_tr(NULL);
5964 if (ret)
5965 return ret;
5966
5967 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5968 }
5969
5970
5971 static const struct file_operations tracing_saved_tgids_fops = {
5972 .open = tracing_saved_tgids_open,
5973 .read = seq_read,
5974 .llseek = seq_lseek,
5975 .release = seq_release,
5976 };
5977
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5978 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5979 {
5980 unsigned int *ptr = v;
5981
5982 if (*pos || m->count)
5983 ptr++;
5984
5985 (*pos)++;
5986
5987 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5988 ptr++) {
5989 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5990 continue;
5991
5992 return ptr;
5993 }
5994
5995 return NULL;
5996 }
5997
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5998 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5999 {
6000 void *v;
6001 loff_t l = 0;
6002
6003 preempt_disable();
6004 arch_spin_lock(&trace_cmdline_lock);
6005
6006 v = &savedcmd->map_cmdline_to_pid[0];
6007 while (l <= *pos) {
6008 v = saved_cmdlines_next(m, v, &l);
6009 if (!v)
6010 return NULL;
6011 }
6012
6013 return v;
6014 }
6015
saved_cmdlines_stop(struct seq_file * m,void * v)6016 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6017 {
6018 arch_spin_unlock(&trace_cmdline_lock);
6019 preempt_enable();
6020 }
6021
saved_cmdlines_show(struct seq_file * m,void * v)6022 static int saved_cmdlines_show(struct seq_file *m, void *v)
6023 {
6024 char buf[TASK_COMM_LEN];
6025 unsigned int *pid = v;
6026
6027 __trace_find_cmdline(*pid, buf);
6028 seq_printf(m, "%d %s\n", *pid, buf);
6029 return 0;
6030 }
6031
6032 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6033 .start = saved_cmdlines_start,
6034 .next = saved_cmdlines_next,
6035 .stop = saved_cmdlines_stop,
6036 .show = saved_cmdlines_show,
6037 };
6038
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)6039 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6040 {
6041 int ret;
6042
6043 ret = tracing_check_open_get_tr(NULL);
6044 if (ret)
6045 return ret;
6046
6047 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6048 }
6049
6050 static const struct file_operations tracing_saved_cmdlines_fops = {
6051 .open = tracing_saved_cmdlines_open,
6052 .read = seq_read,
6053 .llseek = seq_lseek,
6054 .release = seq_release,
6055 };
6056
6057 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6058 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6059 size_t cnt, loff_t *ppos)
6060 {
6061 char buf[64];
6062 int r;
6063
6064 preempt_disable();
6065 arch_spin_lock(&trace_cmdline_lock);
6066 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6067 arch_spin_unlock(&trace_cmdline_lock);
6068 preempt_enable();
6069
6070 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6071 }
6072
tracing_resize_saved_cmdlines(unsigned int val)6073 static int tracing_resize_saved_cmdlines(unsigned int val)
6074 {
6075 struct saved_cmdlines_buffer *s, *savedcmd_temp;
6076
6077 s = allocate_cmdlines_buffer(val);
6078 if (!s)
6079 return -ENOMEM;
6080
6081 preempt_disable();
6082 arch_spin_lock(&trace_cmdline_lock);
6083 savedcmd_temp = savedcmd;
6084 savedcmd = s;
6085 arch_spin_unlock(&trace_cmdline_lock);
6086 preempt_enable();
6087 free_saved_cmdlines_buffer(savedcmd_temp);
6088
6089 return 0;
6090 }
6091
6092 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6093 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6094 size_t cnt, loff_t *ppos)
6095 {
6096 unsigned long val;
6097 int ret;
6098
6099 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6100 if (ret)
6101 return ret;
6102
6103 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6104 if (!val || val > PID_MAX_DEFAULT)
6105 return -EINVAL;
6106
6107 ret = tracing_resize_saved_cmdlines((unsigned int)val);
6108 if (ret < 0)
6109 return ret;
6110
6111 *ppos += cnt;
6112
6113 return cnt;
6114 }
6115
6116 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6117 .open = tracing_open_generic,
6118 .read = tracing_saved_cmdlines_size_read,
6119 .write = tracing_saved_cmdlines_size_write,
6120 };
6121
6122 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6123 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6124 update_eval_map(union trace_eval_map_item *ptr)
6125 {
6126 if (!ptr->map.eval_string) {
6127 if (ptr->tail.next) {
6128 ptr = ptr->tail.next;
6129 /* Set ptr to the next real item (skip head) */
6130 ptr++;
6131 } else
6132 return NULL;
6133 }
6134 return ptr;
6135 }
6136
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6137 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6138 {
6139 union trace_eval_map_item *ptr = v;
6140
6141 /*
6142 * Paranoid! If ptr points to end, we don't want to increment past it.
6143 * This really should never happen.
6144 */
6145 (*pos)++;
6146 ptr = update_eval_map(ptr);
6147 if (WARN_ON_ONCE(!ptr))
6148 return NULL;
6149
6150 ptr++;
6151 ptr = update_eval_map(ptr);
6152
6153 return ptr;
6154 }
6155
eval_map_start(struct seq_file * m,loff_t * pos)6156 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6157 {
6158 union trace_eval_map_item *v;
6159 loff_t l = 0;
6160
6161 mutex_lock(&trace_eval_mutex);
6162
6163 v = trace_eval_maps;
6164 if (v)
6165 v++;
6166
6167 while (v && l < *pos) {
6168 v = eval_map_next(m, v, &l);
6169 }
6170
6171 return v;
6172 }
6173
eval_map_stop(struct seq_file * m,void * v)6174 static void eval_map_stop(struct seq_file *m, void *v)
6175 {
6176 mutex_unlock(&trace_eval_mutex);
6177 }
6178
eval_map_show(struct seq_file * m,void * v)6179 static int eval_map_show(struct seq_file *m, void *v)
6180 {
6181 union trace_eval_map_item *ptr = v;
6182
6183 seq_printf(m, "%s %ld (%s)\n",
6184 ptr->map.eval_string, ptr->map.eval_value,
6185 ptr->map.system);
6186
6187 return 0;
6188 }
6189
6190 static const struct seq_operations tracing_eval_map_seq_ops = {
6191 .start = eval_map_start,
6192 .next = eval_map_next,
6193 .stop = eval_map_stop,
6194 .show = eval_map_show,
6195 };
6196
tracing_eval_map_open(struct inode * inode,struct file * filp)6197 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6198 {
6199 int ret;
6200
6201 ret = tracing_check_open_get_tr(NULL);
6202 if (ret)
6203 return ret;
6204
6205 return seq_open(filp, &tracing_eval_map_seq_ops);
6206 }
6207
6208 static const struct file_operations tracing_eval_map_fops = {
6209 .open = tracing_eval_map_open,
6210 .read = seq_read,
6211 .llseek = seq_lseek,
6212 .release = seq_release,
6213 };
6214
6215 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6216 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6217 {
6218 /* Return tail of array given the head */
6219 return ptr + ptr->head.length + 1;
6220 }
6221
6222 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6223 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6224 int len)
6225 {
6226 struct trace_eval_map **stop;
6227 struct trace_eval_map **map;
6228 union trace_eval_map_item *map_array;
6229 union trace_eval_map_item *ptr;
6230
6231 stop = start + len;
6232
6233 /*
6234 * The trace_eval_maps contains the map plus a head and tail item,
6235 * where the head holds the module and length of array, and the
6236 * tail holds a pointer to the next list.
6237 */
6238 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6239 if (!map_array) {
6240 pr_warn("Unable to allocate trace eval mapping\n");
6241 return;
6242 }
6243
6244 mutex_lock(&trace_eval_mutex);
6245
6246 if (!trace_eval_maps)
6247 trace_eval_maps = map_array;
6248 else {
6249 ptr = trace_eval_maps;
6250 for (;;) {
6251 ptr = trace_eval_jmp_to_tail(ptr);
6252 if (!ptr->tail.next)
6253 break;
6254 ptr = ptr->tail.next;
6255
6256 }
6257 ptr->tail.next = map_array;
6258 }
6259 map_array->head.mod = mod;
6260 map_array->head.length = len;
6261 map_array++;
6262
6263 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6264 map_array->map = **map;
6265 map_array++;
6266 }
6267 memset(map_array, 0, sizeof(*map_array));
6268
6269 mutex_unlock(&trace_eval_mutex);
6270 }
6271
trace_create_eval_file(struct dentry * d_tracer)6272 static void trace_create_eval_file(struct dentry *d_tracer)
6273 {
6274 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6275 NULL, &tracing_eval_map_fops);
6276 }
6277
6278 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6279 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6280 static inline void trace_insert_eval_map_file(struct module *mod,
6281 struct trace_eval_map **start, int len) { }
6282 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6283
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6284 static void trace_insert_eval_map(struct module *mod,
6285 struct trace_eval_map **start, int len)
6286 {
6287 struct trace_eval_map **map;
6288
6289 if (len <= 0)
6290 return;
6291
6292 map = start;
6293
6294 trace_event_eval_update(map, len);
6295
6296 trace_insert_eval_map_file(mod, start, len);
6297 }
6298
6299 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6300 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6301 size_t cnt, loff_t *ppos)
6302 {
6303 struct trace_array *tr = filp->private_data;
6304 char buf[MAX_TRACER_SIZE+2];
6305 int r;
6306
6307 mutex_lock(&trace_types_lock);
6308 r = sprintf(buf, "%s\n", tr->current_trace->name);
6309 mutex_unlock(&trace_types_lock);
6310
6311 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6312 }
6313
tracer_init(struct tracer * t,struct trace_array * tr)6314 int tracer_init(struct tracer *t, struct trace_array *tr)
6315 {
6316 tracing_reset_online_cpus(&tr->array_buffer);
6317 return t->init(tr);
6318 }
6319
set_buffer_entries(struct array_buffer * buf,unsigned long val)6320 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6321 {
6322 int cpu;
6323
6324 for_each_tracing_cpu(cpu)
6325 per_cpu_ptr(buf->data, cpu)->entries = val;
6326 }
6327
update_buffer_entries(struct array_buffer * buf,int cpu)6328 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6329 {
6330 if (cpu == RING_BUFFER_ALL_CPUS) {
6331 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6332 } else {
6333 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6334 }
6335 }
6336
6337 #ifdef CONFIG_TRACER_MAX_TRACE
6338 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6339 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6340 struct array_buffer *size_buf, int cpu_id)
6341 {
6342 int cpu, ret = 0;
6343
6344 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6345 for_each_tracing_cpu(cpu) {
6346 ret = ring_buffer_resize(trace_buf->buffer,
6347 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6348 if (ret < 0)
6349 break;
6350 per_cpu_ptr(trace_buf->data, cpu)->entries =
6351 per_cpu_ptr(size_buf->data, cpu)->entries;
6352 }
6353 } else {
6354 ret = ring_buffer_resize(trace_buf->buffer,
6355 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6356 if (ret == 0)
6357 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6358 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6359 }
6360
6361 return ret;
6362 }
6363 #endif /* CONFIG_TRACER_MAX_TRACE */
6364
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6365 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6366 unsigned long size, int cpu)
6367 {
6368 int ret;
6369
6370 /*
6371 * If kernel or user changes the size of the ring buffer
6372 * we use the size that was given, and we can forget about
6373 * expanding it later.
6374 */
6375 ring_buffer_expanded = true;
6376
6377 /* May be called before buffers are initialized */
6378 if (!tr->array_buffer.buffer)
6379 return 0;
6380
6381 /* Do not allow tracing while resizing ring buffer */
6382 tracing_stop_tr(tr);
6383
6384 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6385 if (ret < 0)
6386 goto out_start;
6387
6388 #ifdef CONFIG_TRACER_MAX_TRACE
6389 if (!tr->allocated_snapshot)
6390 goto out;
6391
6392 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6393 if (ret < 0) {
6394 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6395 &tr->array_buffer, cpu);
6396 if (r < 0) {
6397 /*
6398 * AARGH! We are left with different
6399 * size max buffer!!!!
6400 * The max buffer is our "snapshot" buffer.
6401 * When a tracer needs a snapshot (one of the
6402 * latency tracers), it swaps the max buffer
6403 * with the saved snap shot. We succeeded to
6404 * update the size of the main buffer, but failed to
6405 * update the size of the max buffer. But when we tried
6406 * to reset the main buffer to the original size, we
6407 * failed there too. This is very unlikely to
6408 * happen, but if it does, warn and kill all
6409 * tracing.
6410 */
6411 WARN_ON(1);
6412 tracing_disabled = 1;
6413 }
6414 goto out_start;
6415 }
6416
6417 update_buffer_entries(&tr->max_buffer, cpu);
6418
6419 out:
6420 #endif /* CONFIG_TRACER_MAX_TRACE */
6421
6422 update_buffer_entries(&tr->array_buffer, cpu);
6423 out_start:
6424 tracing_start_tr(tr);
6425 return ret;
6426 }
6427
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6428 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6429 unsigned long size, int cpu_id)
6430 {
6431 int ret;
6432
6433 mutex_lock(&trace_types_lock);
6434
6435 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6436 /* make sure, this cpu is enabled in the mask */
6437 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6438 ret = -EINVAL;
6439 goto out;
6440 }
6441 }
6442
6443 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6444 if (ret < 0)
6445 ret = -ENOMEM;
6446
6447 out:
6448 mutex_unlock(&trace_types_lock);
6449
6450 return ret;
6451 }
6452
6453
6454 /**
6455 * tracing_update_buffers - used by tracing facility to expand ring buffers
6456 *
6457 * To save on memory when the tracing is never used on a system with it
6458 * configured in. The ring buffers are set to a minimum size. But once
6459 * a user starts to use the tracing facility, then they need to grow
6460 * to their default size.
6461 *
6462 * This function is to be called when a tracer is about to be used.
6463 */
tracing_update_buffers(void)6464 int tracing_update_buffers(void)
6465 {
6466 int ret = 0;
6467
6468 mutex_lock(&trace_types_lock);
6469 if (!ring_buffer_expanded)
6470 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6471 RING_BUFFER_ALL_CPUS);
6472 mutex_unlock(&trace_types_lock);
6473
6474 return ret;
6475 }
6476
6477 struct trace_option_dentry;
6478
6479 static void
6480 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6481
6482 /*
6483 * Used to clear out the tracer before deletion of an instance.
6484 * Must have trace_types_lock held.
6485 */
tracing_set_nop(struct trace_array * tr)6486 static void tracing_set_nop(struct trace_array *tr)
6487 {
6488 if (tr->current_trace == &nop_trace)
6489 return;
6490
6491 tr->current_trace->enabled--;
6492
6493 if (tr->current_trace->reset)
6494 tr->current_trace->reset(tr);
6495
6496 tr->current_trace = &nop_trace;
6497 }
6498
6499 static bool tracer_options_updated;
6500
add_tracer_options(struct trace_array * tr,struct tracer * t)6501 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6502 {
6503 /* Only enable if the directory has been created already. */
6504 if (!tr->dir)
6505 return;
6506
6507 /* Only create trace option files after update_tracer_options finish */
6508 if (!tracer_options_updated)
6509 return;
6510
6511 create_trace_option_files(tr, t);
6512 }
6513
tracing_set_tracer(struct trace_array * tr,const char * buf)6514 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6515 {
6516 struct tracer *t;
6517 #ifdef CONFIG_TRACER_MAX_TRACE
6518 bool had_max_tr;
6519 #endif
6520 int ret = 0;
6521
6522 mutex_lock(&trace_types_lock);
6523
6524 if (!ring_buffer_expanded) {
6525 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6526 RING_BUFFER_ALL_CPUS);
6527 if (ret < 0)
6528 goto out;
6529 ret = 0;
6530 }
6531
6532 for (t = trace_types; t; t = t->next) {
6533 if (strcmp(t->name, buf) == 0)
6534 break;
6535 }
6536 if (!t) {
6537 ret = -EINVAL;
6538 goto out;
6539 }
6540 if (t == tr->current_trace)
6541 goto out;
6542
6543 #ifdef CONFIG_TRACER_SNAPSHOT
6544 if (t->use_max_tr) {
6545 local_irq_disable();
6546 arch_spin_lock(&tr->max_lock);
6547 if (tr->cond_snapshot)
6548 ret = -EBUSY;
6549 arch_spin_unlock(&tr->max_lock);
6550 local_irq_enable();
6551 if (ret)
6552 goto out;
6553 }
6554 #endif
6555 /* Some tracers won't work on kernel command line */
6556 if (system_state < SYSTEM_RUNNING && t->noboot) {
6557 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6558 t->name);
6559 goto out;
6560 }
6561
6562 /* Some tracers are only allowed for the top level buffer */
6563 if (!trace_ok_for_array(t, tr)) {
6564 ret = -EINVAL;
6565 goto out;
6566 }
6567
6568 /* If trace pipe files are being read, we can't change the tracer */
6569 if (tr->trace_ref) {
6570 ret = -EBUSY;
6571 goto out;
6572 }
6573
6574 trace_branch_disable();
6575
6576 tr->current_trace->enabled--;
6577
6578 if (tr->current_trace->reset)
6579 tr->current_trace->reset(tr);
6580
6581 #ifdef CONFIG_TRACER_MAX_TRACE
6582 had_max_tr = tr->current_trace->use_max_tr;
6583
6584 /* Current trace needs to be nop_trace before synchronize_rcu */
6585 tr->current_trace = &nop_trace;
6586
6587 if (had_max_tr && !t->use_max_tr) {
6588 /*
6589 * We need to make sure that the update_max_tr sees that
6590 * current_trace changed to nop_trace to keep it from
6591 * swapping the buffers after we resize it.
6592 * The update_max_tr is called from interrupts disabled
6593 * so a synchronized_sched() is sufficient.
6594 */
6595 synchronize_rcu();
6596 free_snapshot(tr);
6597 }
6598
6599 if (t->use_max_tr && !tr->allocated_snapshot) {
6600 ret = tracing_alloc_snapshot_instance(tr);
6601 if (ret < 0)
6602 goto out;
6603 }
6604 #else
6605 tr->current_trace = &nop_trace;
6606 #endif
6607
6608 if (t->init) {
6609 ret = tracer_init(t, tr);
6610 if (ret)
6611 goto out;
6612 }
6613
6614 tr->current_trace = t;
6615 tr->current_trace->enabled++;
6616 trace_branch_enable(tr);
6617 out:
6618 mutex_unlock(&trace_types_lock);
6619
6620 return ret;
6621 }
6622
6623 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6624 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6625 size_t cnt, loff_t *ppos)
6626 {
6627 struct trace_array *tr = filp->private_data;
6628 char buf[MAX_TRACER_SIZE+1];
6629 char *name;
6630 size_t ret;
6631 int err;
6632
6633 ret = cnt;
6634
6635 if (cnt > MAX_TRACER_SIZE)
6636 cnt = MAX_TRACER_SIZE;
6637
6638 if (copy_from_user(buf, ubuf, cnt))
6639 return -EFAULT;
6640
6641 buf[cnt] = 0;
6642
6643 name = strim(buf);
6644
6645 err = tracing_set_tracer(tr, name);
6646 if (err)
6647 return err;
6648
6649 *ppos += ret;
6650
6651 return ret;
6652 }
6653
6654 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6655 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6656 size_t cnt, loff_t *ppos)
6657 {
6658 char buf[64];
6659 int r;
6660
6661 r = snprintf(buf, sizeof(buf), "%ld\n",
6662 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6663 if (r > sizeof(buf))
6664 r = sizeof(buf);
6665 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6666 }
6667
6668 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6669 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6670 size_t cnt, loff_t *ppos)
6671 {
6672 unsigned long val;
6673 int ret;
6674
6675 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6676 if (ret)
6677 return ret;
6678
6679 *ptr = val * 1000;
6680
6681 return cnt;
6682 }
6683
6684 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6685 tracing_thresh_read(struct file *filp, char __user *ubuf,
6686 size_t cnt, loff_t *ppos)
6687 {
6688 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6689 }
6690
6691 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6692 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6693 size_t cnt, loff_t *ppos)
6694 {
6695 struct trace_array *tr = filp->private_data;
6696 int ret;
6697
6698 mutex_lock(&trace_types_lock);
6699 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6700 if (ret < 0)
6701 goto out;
6702
6703 if (tr->current_trace->update_thresh) {
6704 ret = tr->current_trace->update_thresh(tr);
6705 if (ret < 0)
6706 goto out;
6707 }
6708
6709 ret = cnt;
6710 out:
6711 mutex_unlock(&trace_types_lock);
6712
6713 return ret;
6714 }
6715
6716 #ifdef CONFIG_TRACER_MAX_TRACE
6717
6718 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6719 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6720 size_t cnt, loff_t *ppos)
6721 {
6722 struct trace_array *tr = filp->private_data;
6723
6724 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6725 }
6726
6727 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6728 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6729 size_t cnt, loff_t *ppos)
6730 {
6731 struct trace_array *tr = filp->private_data;
6732
6733 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6734 }
6735
6736 #endif
6737
open_pipe_on_cpu(struct trace_array * tr,int cpu)6738 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6739 {
6740 if (cpu == RING_BUFFER_ALL_CPUS) {
6741 if (cpumask_empty(tr->pipe_cpumask)) {
6742 cpumask_setall(tr->pipe_cpumask);
6743 return 0;
6744 }
6745 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6746 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6747 return 0;
6748 }
6749 return -EBUSY;
6750 }
6751
close_pipe_on_cpu(struct trace_array * tr,int cpu)6752 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6753 {
6754 if (cpu == RING_BUFFER_ALL_CPUS) {
6755 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6756 cpumask_clear(tr->pipe_cpumask);
6757 } else {
6758 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6759 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6760 }
6761 }
6762
tracing_open_pipe(struct inode * inode,struct file * filp)6763 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6764 {
6765 struct trace_array *tr = inode->i_private;
6766 struct trace_iterator *iter;
6767 int cpu;
6768 int ret;
6769
6770 ret = tracing_check_open_get_tr(tr);
6771 if (ret)
6772 return ret;
6773
6774 mutex_lock(&trace_types_lock);
6775 cpu = tracing_get_cpu(inode);
6776 ret = open_pipe_on_cpu(tr, cpu);
6777 if (ret)
6778 goto fail_pipe_on_cpu;
6779
6780 /* create a buffer to store the information to pass to userspace */
6781 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6782 if (!iter) {
6783 ret = -ENOMEM;
6784 goto fail_alloc_iter;
6785 }
6786
6787 trace_seq_init(&iter->seq);
6788 iter->trace = tr->current_trace;
6789
6790 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6791 ret = -ENOMEM;
6792 goto fail;
6793 }
6794
6795 /* trace pipe does not show start of buffer */
6796 cpumask_setall(iter->started);
6797
6798 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6799 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6800
6801 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6802 if (trace_clocks[tr->clock_id].in_ns)
6803 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6804
6805 iter->tr = tr;
6806 iter->array_buffer = &tr->array_buffer;
6807 iter->cpu_file = cpu;
6808 mutex_init(&iter->mutex);
6809 filp->private_data = iter;
6810
6811 if (iter->trace->pipe_open)
6812 iter->trace->pipe_open(iter);
6813
6814 nonseekable_open(inode, filp);
6815
6816 tr->trace_ref++;
6817
6818 mutex_unlock(&trace_types_lock);
6819 return ret;
6820
6821 fail:
6822 kfree(iter);
6823 fail_alloc_iter:
6824 close_pipe_on_cpu(tr, cpu);
6825 fail_pipe_on_cpu:
6826 __trace_array_put(tr);
6827 mutex_unlock(&trace_types_lock);
6828 return ret;
6829 }
6830
tracing_release_pipe(struct inode * inode,struct file * file)6831 static int tracing_release_pipe(struct inode *inode, struct file *file)
6832 {
6833 struct trace_iterator *iter = file->private_data;
6834 struct trace_array *tr = inode->i_private;
6835
6836 mutex_lock(&trace_types_lock);
6837
6838 tr->trace_ref--;
6839
6840 if (iter->trace->pipe_close)
6841 iter->trace->pipe_close(iter);
6842 close_pipe_on_cpu(tr, iter->cpu_file);
6843 mutex_unlock(&trace_types_lock);
6844
6845 free_trace_iter_content(iter);
6846 kfree(iter);
6847
6848 trace_array_put(tr);
6849
6850 return 0;
6851 }
6852
6853 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6854 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6855 {
6856 struct trace_array *tr = iter->tr;
6857
6858 /* Iterators are static, they should be filled or empty */
6859 if (trace_buffer_iter(iter, iter->cpu_file))
6860 return EPOLLIN | EPOLLRDNORM;
6861
6862 if (tr->trace_flags & TRACE_ITER_BLOCK)
6863 /*
6864 * Always select as readable when in blocking mode
6865 */
6866 return EPOLLIN | EPOLLRDNORM;
6867 else
6868 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6869 filp, poll_table, iter->tr->buffer_percent);
6870 }
6871
6872 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6873 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6874 {
6875 struct trace_iterator *iter = filp->private_data;
6876
6877 return trace_poll(iter, filp, poll_table);
6878 }
6879
6880 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6881 static int tracing_wait_pipe(struct file *filp)
6882 {
6883 struct trace_iterator *iter = filp->private_data;
6884 int ret;
6885
6886 while (trace_empty(iter)) {
6887
6888 if ((filp->f_flags & O_NONBLOCK)) {
6889 return -EAGAIN;
6890 }
6891
6892 /*
6893 * We block until we read something and tracing is disabled.
6894 * We still block if tracing is disabled, but we have never
6895 * read anything. This allows a user to cat this file, and
6896 * then enable tracing. But after we have read something,
6897 * we give an EOF when tracing is again disabled.
6898 *
6899 * iter->pos will be 0 if we haven't read anything.
6900 */
6901 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6902 break;
6903
6904 mutex_unlock(&iter->mutex);
6905
6906 ret = wait_on_pipe(iter, 0);
6907
6908 mutex_lock(&iter->mutex);
6909
6910 if (ret)
6911 return ret;
6912 }
6913
6914 return 1;
6915 }
6916
6917 /*
6918 * Consumer reader.
6919 */
6920 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6921 tracing_read_pipe(struct file *filp, char __user *ubuf,
6922 size_t cnt, loff_t *ppos)
6923 {
6924 struct trace_iterator *iter = filp->private_data;
6925 ssize_t sret;
6926
6927 /*
6928 * Avoid more than one consumer on a single file descriptor
6929 * This is just a matter of traces coherency, the ring buffer itself
6930 * is protected.
6931 */
6932 mutex_lock(&iter->mutex);
6933
6934 /* return any leftover data */
6935 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6936 if (sret != -EBUSY)
6937 goto out;
6938
6939 trace_seq_init(&iter->seq);
6940
6941 if (iter->trace->read) {
6942 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6943 if (sret)
6944 goto out;
6945 }
6946
6947 waitagain:
6948 sret = tracing_wait_pipe(filp);
6949 if (sret <= 0)
6950 goto out;
6951
6952 /* stop when tracing is finished */
6953 if (trace_empty(iter)) {
6954 sret = 0;
6955 goto out;
6956 }
6957
6958 if (cnt >= PAGE_SIZE)
6959 cnt = PAGE_SIZE - 1;
6960
6961 /* reset all but tr, trace, and overruns */
6962 trace_iterator_reset(iter);
6963 cpumask_clear(iter->started);
6964 trace_seq_init(&iter->seq);
6965
6966 trace_event_read_lock();
6967 trace_access_lock(iter->cpu_file);
6968 while (trace_find_next_entry_inc(iter) != NULL) {
6969 enum print_line_t ret;
6970 int save_len = iter->seq.seq.len;
6971
6972 ret = print_trace_line(iter);
6973 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6974 /*
6975 * If one print_trace_line() fills entire trace_seq in one shot,
6976 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6977 * In this case, we need to consume it, otherwise, loop will peek
6978 * this event next time, resulting in an infinite loop.
6979 */
6980 if (save_len == 0) {
6981 iter->seq.full = 0;
6982 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6983 trace_consume(iter);
6984 break;
6985 }
6986
6987 /* In other cases, don't print partial lines */
6988 iter->seq.seq.len = save_len;
6989 break;
6990 }
6991 if (ret != TRACE_TYPE_NO_CONSUME)
6992 trace_consume(iter);
6993
6994 if (trace_seq_used(&iter->seq) >= cnt)
6995 break;
6996
6997 /*
6998 * Setting the full flag means we reached the trace_seq buffer
6999 * size and we should leave by partial output condition above.
7000 * One of the trace_seq_* functions is not used properly.
7001 */
7002 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7003 iter->ent->type);
7004 }
7005 trace_access_unlock(iter->cpu_file);
7006 trace_event_read_unlock();
7007
7008 /* Now copy what we have to the user */
7009 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7010 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
7011 trace_seq_init(&iter->seq);
7012
7013 /*
7014 * If there was nothing to send to user, in spite of consuming trace
7015 * entries, go back to wait for more entries.
7016 */
7017 if (sret == -EBUSY)
7018 goto waitagain;
7019
7020 out:
7021 mutex_unlock(&iter->mutex);
7022
7023 return sret;
7024 }
7025
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)7026 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7027 unsigned int idx)
7028 {
7029 __free_page(spd->pages[idx]);
7030 }
7031
7032 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)7033 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7034 {
7035 size_t count;
7036 int save_len;
7037 int ret;
7038
7039 /* Seq buffer is page-sized, exactly what we need. */
7040 for (;;) {
7041 save_len = iter->seq.seq.len;
7042 ret = print_trace_line(iter);
7043
7044 if (trace_seq_has_overflowed(&iter->seq)) {
7045 iter->seq.seq.len = save_len;
7046 break;
7047 }
7048
7049 /*
7050 * This should not be hit, because it should only
7051 * be set if the iter->seq overflowed. But check it
7052 * anyway to be safe.
7053 */
7054 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7055 iter->seq.seq.len = save_len;
7056 break;
7057 }
7058
7059 count = trace_seq_used(&iter->seq) - save_len;
7060 if (rem < count) {
7061 rem = 0;
7062 iter->seq.seq.len = save_len;
7063 break;
7064 }
7065
7066 if (ret != TRACE_TYPE_NO_CONSUME)
7067 trace_consume(iter);
7068 rem -= count;
7069 if (!trace_find_next_entry_inc(iter)) {
7070 rem = 0;
7071 iter->ent = NULL;
7072 break;
7073 }
7074 }
7075
7076 return rem;
7077 }
7078
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7079 static ssize_t tracing_splice_read_pipe(struct file *filp,
7080 loff_t *ppos,
7081 struct pipe_inode_info *pipe,
7082 size_t len,
7083 unsigned int flags)
7084 {
7085 struct page *pages_def[PIPE_DEF_BUFFERS];
7086 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7087 struct trace_iterator *iter = filp->private_data;
7088 struct splice_pipe_desc spd = {
7089 .pages = pages_def,
7090 .partial = partial_def,
7091 .nr_pages = 0, /* This gets updated below. */
7092 .nr_pages_max = PIPE_DEF_BUFFERS,
7093 .ops = &default_pipe_buf_ops,
7094 .spd_release = tracing_spd_release_pipe,
7095 };
7096 ssize_t ret;
7097 size_t rem;
7098 unsigned int i;
7099
7100 if (splice_grow_spd(pipe, &spd))
7101 return -ENOMEM;
7102
7103 mutex_lock(&iter->mutex);
7104
7105 if (iter->trace->splice_read) {
7106 ret = iter->trace->splice_read(iter, filp,
7107 ppos, pipe, len, flags);
7108 if (ret)
7109 goto out_err;
7110 }
7111
7112 ret = tracing_wait_pipe(filp);
7113 if (ret <= 0)
7114 goto out_err;
7115
7116 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7117 ret = -EFAULT;
7118 goto out_err;
7119 }
7120
7121 trace_event_read_lock();
7122 trace_access_lock(iter->cpu_file);
7123
7124 /* Fill as many pages as possible. */
7125 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7126 spd.pages[i] = alloc_page(GFP_KERNEL);
7127 if (!spd.pages[i])
7128 break;
7129
7130 rem = tracing_fill_pipe_page(rem, iter);
7131
7132 /* Copy the data into the page, so we can start over. */
7133 ret = trace_seq_to_buffer(&iter->seq,
7134 page_address(spd.pages[i]),
7135 trace_seq_used(&iter->seq));
7136 if (ret < 0) {
7137 __free_page(spd.pages[i]);
7138 break;
7139 }
7140 spd.partial[i].offset = 0;
7141 spd.partial[i].len = trace_seq_used(&iter->seq);
7142
7143 trace_seq_init(&iter->seq);
7144 }
7145
7146 trace_access_unlock(iter->cpu_file);
7147 trace_event_read_unlock();
7148 mutex_unlock(&iter->mutex);
7149
7150 spd.nr_pages = i;
7151
7152 if (i)
7153 ret = splice_to_pipe(pipe, &spd);
7154 else
7155 ret = 0;
7156 out:
7157 splice_shrink_spd(&spd);
7158 return ret;
7159
7160 out_err:
7161 mutex_unlock(&iter->mutex);
7162 goto out;
7163 }
7164
7165 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7166 tracing_entries_read(struct file *filp, char __user *ubuf,
7167 size_t cnt, loff_t *ppos)
7168 {
7169 struct inode *inode = file_inode(filp);
7170 struct trace_array *tr = inode->i_private;
7171 int cpu = tracing_get_cpu(inode);
7172 char buf[64];
7173 int r = 0;
7174 ssize_t ret;
7175
7176 mutex_lock(&trace_types_lock);
7177
7178 if (cpu == RING_BUFFER_ALL_CPUS) {
7179 int cpu, buf_size_same;
7180 unsigned long size;
7181
7182 size = 0;
7183 buf_size_same = 1;
7184 /* check if all cpu sizes are same */
7185 for_each_tracing_cpu(cpu) {
7186 /* fill in the size from first enabled cpu */
7187 if (size == 0)
7188 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7189 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7190 buf_size_same = 0;
7191 break;
7192 }
7193 }
7194
7195 if (buf_size_same) {
7196 if (!ring_buffer_expanded)
7197 r = sprintf(buf, "%lu (expanded: %lu)\n",
7198 size >> 10,
7199 trace_buf_size >> 10);
7200 else
7201 r = sprintf(buf, "%lu\n", size >> 10);
7202 } else
7203 r = sprintf(buf, "X\n");
7204 } else
7205 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7206
7207 mutex_unlock(&trace_types_lock);
7208
7209 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7210 return ret;
7211 }
7212
7213 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7214 tracing_entries_write(struct file *filp, const char __user *ubuf,
7215 size_t cnt, loff_t *ppos)
7216 {
7217 struct inode *inode = file_inode(filp);
7218 struct trace_array *tr = inode->i_private;
7219 unsigned long val;
7220 int ret;
7221
7222 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7223 if (ret)
7224 return ret;
7225
7226 /* must have at least 1 entry */
7227 if (!val)
7228 return -EINVAL;
7229
7230 /* value is in KB */
7231 val <<= 10;
7232 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7233 if (ret < 0)
7234 return ret;
7235
7236 *ppos += cnt;
7237
7238 return cnt;
7239 }
7240
7241 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7242 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7243 size_t cnt, loff_t *ppos)
7244 {
7245 struct trace_array *tr = filp->private_data;
7246 char buf[64];
7247 int r, cpu;
7248 unsigned long size = 0, expanded_size = 0;
7249
7250 mutex_lock(&trace_types_lock);
7251 for_each_tracing_cpu(cpu) {
7252 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7253 if (!ring_buffer_expanded)
7254 expanded_size += trace_buf_size >> 10;
7255 }
7256 if (ring_buffer_expanded)
7257 r = sprintf(buf, "%lu\n", size);
7258 else
7259 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7260 mutex_unlock(&trace_types_lock);
7261
7262 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7263 }
7264
7265 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7266 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7267 size_t cnt, loff_t *ppos)
7268 {
7269 /*
7270 * There is no need to read what the user has written, this function
7271 * is just to make sure that there is no error when "echo" is used
7272 */
7273
7274 *ppos += cnt;
7275
7276 return cnt;
7277 }
7278
7279 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7280 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7281 {
7282 struct trace_array *tr = inode->i_private;
7283
7284 /* disable tracing ? */
7285 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7286 tracer_tracing_off(tr);
7287 /* resize the ring buffer to 0 */
7288 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7289
7290 trace_array_put(tr);
7291
7292 return 0;
7293 }
7294
7295 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7296 tracing_mark_write(struct file *filp, const char __user *ubuf,
7297 size_t cnt, loff_t *fpos)
7298 {
7299 struct trace_array *tr = filp->private_data;
7300 struct ring_buffer_event *event;
7301 enum event_trigger_type tt = ETT_NONE;
7302 struct trace_buffer *buffer;
7303 struct print_entry *entry;
7304 ssize_t written;
7305 int size;
7306 int len;
7307
7308 /* Used in tracing_mark_raw_write() as well */
7309 #define FAULTED_STR "<faulted>"
7310 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7311
7312 if (tracing_disabled)
7313 return -EINVAL;
7314
7315 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7316 return -EINVAL;
7317
7318 if (cnt > TRACE_BUF_SIZE)
7319 cnt = TRACE_BUF_SIZE;
7320
7321 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7322
7323 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7324
7325 /* If less than "<faulted>", then make sure we can still add that */
7326 if (cnt < FAULTED_SIZE)
7327 size += FAULTED_SIZE - cnt;
7328
7329 buffer = tr->array_buffer.buffer;
7330 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7331 tracing_gen_ctx());
7332 if (unlikely(!event))
7333 /* Ring buffer disabled, return as if not open for write */
7334 return -EBADF;
7335
7336 entry = ring_buffer_event_data(event);
7337 entry->ip = _THIS_IP_;
7338
7339 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7340 if (len) {
7341 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7342 cnt = FAULTED_SIZE;
7343 written = -EFAULT;
7344 } else
7345 written = cnt;
7346
7347 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7348 /* do not add \n before testing triggers, but add \0 */
7349 entry->buf[cnt] = '\0';
7350 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7351 }
7352
7353 if (entry->buf[cnt - 1] != '\n') {
7354 entry->buf[cnt] = '\n';
7355 entry->buf[cnt + 1] = '\0';
7356 } else
7357 entry->buf[cnt] = '\0';
7358
7359 if (static_branch_unlikely(&trace_marker_exports_enabled))
7360 ftrace_exports(event, TRACE_EXPORT_MARKER);
7361 __buffer_unlock_commit(buffer, event);
7362
7363 if (tt)
7364 event_triggers_post_call(tr->trace_marker_file, tt);
7365
7366 return written;
7367 }
7368
7369 /* Limit it for now to 3K (including tag) */
7370 #define RAW_DATA_MAX_SIZE (1024*3)
7371
7372 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7373 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7374 size_t cnt, loff_t *fpos)
7375 {
7376 struct trace_array *tr = filp->private_data;
7377 struct ring_buffer_event *event;
7378 struct trace_buffer *buffer;
7379 struct raw_data_entry *entry;
7380 ssize_t written;
7381 int size;
7382 int len;
7383
7384 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7385
7386 if (tracing_disabled)
7387 return -EINVAL;
7388
7389 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7390 return -EINVAL;
7391
7392 /* The marker must at least have a tag id */
7393 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7394 return -EINVAL;
7395
7396 if (cnt > TRACE_BUF_SIZE)
7397 cnt = TRACE_BUF_SIZE;
7398
7399 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7400
7401 size = sizeof(*entry) + cnt;
7402 if (cnt < FAULT_SIZE_ID)
7403 size += FAULT_SIZE_ID - cnt;
7404
7405 buffer = tr->array_buffer.buffer;
7406 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7407 tracing_gen_ctx());
7408 if (!event)
7409 /* Ring buffer disabled, return as if not open for write */
7410 return -EBADF;
7411
7412 entry = ring_buffer_event_data(event);
7413
7414 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7415 if (len) {
7416 entry->id = -1;
7417 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7418 written = -EFAULT;
7419 } else
7420 written = cnt;
7421
7422 __buffer_unlock_commit(buffer, event);
7423
7424 return written;
7425 }
7426
tracing_clock_show(struct seq_file * m,void * v)7427 static int tracing_clock_show(struct seq_file *m, void *v)
7428 {
7429 struct trace_array *tr = m->private;
7430 int i;
7431
7432 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7433 seq_printf(m,
7434 "%s%s%s%s", i ? " " : "",
7435 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7436 i == tr->clock_id ? "]" : "");
7437 seq_putc(m, '\n');
7438
7439 return 0;
7440 }
7441
tracing_set_clock(struct trace_array * tr,const char * clockstr)7442 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7443 {
7444 int i;
7445
7446 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7447 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7448 break;
7449 }
7450 if (i == ARRAY_SIZE(trace_clocks))
7451 return -EINVAL;
7452
7453 mutex_lock(&trace_types_lock);
7454
7455 tr->clock_id = i;
7456
7457 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7458
7459 /*
7460 * New clock may not be consistent with the previous clock.
7461 * Reset the buffer so that it doesn't have incomparable timestamps.
7462 */
7463 tracing_reset_online_cpus(&tr->array_buffer);
7464
7465 #ifdef CONFIG_TRACER_MAX_TRACE
7466 if (tr->max_buffer.buffer)
7467 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7468 tracing_reset_online_cpus(&tr->max_buffer);
7469 #endif
7470
7471 mutex_unlock(&trace_types_lock);
7472
7473 return 0;
7474 }
7475
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7476 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7477 size_t cnt, loff_t *fpos)
7478 {
7479 struct seq_file *m = filp->private_data;
7480 struct trace_array *tr = m->private;
7481 char buf[64];
7482 const char *clockstr;
7483 int ret;
7484
7485 if (cnt >= sizeof(buf))
7486 return -EINVAL;
7487
7488 if (copy_from_user(buf, ubuf, cnt))
7489 return -EFAULT;
7490
7491 buf[cnt] = 0;
7492
7493 clockstr = strstrip(buf);
7494
7495 ret = tracing_set_clock(tr, clockstr);
7496 if (ret)
7497 return ret;
7498
7499 *fpos += cnt;
7500
7501 return cnt;
7502 }
7503
tracing_clock_open(struct inode * inode,struct file * file)7504 static int tracing_clock_open(struct inode *inode, struct file *file)
7505 {
7506 struct trace_array *tr = inode->i_private;
7507 int ret;
7508
7509 ret = tracing_check_open_get_tr(tr);
7510 if (ret)
7511 return ret;
7512
7513 ret = single_open(file, tracing_clock_show, inode->i_private);
7514 if (ret < 0)
7515 trace_array_put(tr);
7516
7517 return ret;
7518 }
7519
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7520 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7521 {
7522 struct trace_array *tr = m->private;
7523
7524 mutex_lock(&trace_types_lock);
7525
7526 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7527 seq_puts(m, "delta [absolute]\n");
7528 else
7529 seq_puts(m, "[delta] absolute\n");
7530
7531 mutex_unlock(&trace_types_lock);
7532
7533 return 0;
7534 }
7535
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7536 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7537 {
7538 struct trace_array *tr = inode->i_private;
7539 int ret;
7540
7541 ret = tracing_check_open_get_tr(tr);
7542 if (ret)
7543 return ret;
7544
7545 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7546 if (ret < 0)
7547 trace_array_put(tr);
7548
7549 return ret;
7550 }
7551
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7552 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7553 {
7554 if (rbe == this_cpu_read(trace_buffered_event))
7555 return ring_buffer_time_stamp(buffer);
7556
7557 return ring_buffer_event_time_stamp(buffer, rbe);
7558 }
7559
7560 /*
7561 * Set or disable using the per CPU trace_buffer_event when possible.
7562 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7563 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7564 {
7565 int ret = 0;
7566
7567 mutex_lock(&trace_types_lock);
7568
7569 if (set && tr->no_filter_buffering_ref++)
7570 goto out;
7571
7572 if (!set) {
7573 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7574 ret = -EINVAL;
7575 goto out;
7576 }
7577
7578 --tr->no_filter_buffering_ref;
7579 }
7580 out:
7581 mutex_unlock(&trace_types_lock);
7582
7583 return ret;
7584 }
7585
7586 struct ftrace_buffer_info {
7587 struct trace_iterator iter;
7588 void *spare;
7589 unsigned int spare_cpu;
7590 unsigned int read;
7591 };
7592
7593 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7594 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7595 {
7596 struct trace_array *tr = inode->i_private;
7597 struct trace_iterator *iter;
7598 struct seq_file *m;
7599 int ret;
7600
7601 ret = tracing_check_open_get_tr(tr);
7602 if (ret)
7603 return ret;
7604
7605 if (file->f_mode & FMODE_READ) {
7606 iter = __tracing_open(inode, file, true);
7607 if (IS_ERR(iter))
7608 ret = PTR_ERR(iter);
7609 } else {
7610 /* Writes still need the seq_file to hold the private data */
7611 ret = -ENOMEM;
7612 m = kzalloc(sizeof(*m), GFP_KERNEL);
7613 if (!m)
7614 goto out;
7615 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7616 if (!iter) {
7617 kfree(m);
7618 goto out;
7619 }
7620 ret = 0;
7621
7622 iter->tr = tr;
7623 iter->array_buffer = &tr->max_buffer;
7624 iter->cpu_file = tracing_get_cpu(inode);
7625 m->private = iter;
7626 file->private_data = m;
7627 }
7628 out:
7629 if (ret < 0)
7630 trace_array_put(tr);
7631
7632 return ret;
7633 }
7634
tracing_swap_cpu_buffer(void * tr)7635 static void tracing_swap_cpu_buffer(void *tr)
7636 {
7637 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7638 }
7639
7640 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7641 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7642 loff_t *ppos)
7643 {
7644 struct seq_file *m = filp->private_data;
7645 struct trace_iterator *iter = m->private;
7646 struct trace_array *tr = iter->tr;
7647 unsigned long val;
7648 int ret;
7649
7650 ret = tracing_update_buffers();
7651 if (ret < 0)
7652 return ret;
7653
7654 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7655 if (ret)
7656 return ret;
7657
7658 mutex_lock(&trace_types_lock);
7659
7660 if (tr->current_trace->use_max_tr) {
7661 ret = -EBUSY;
7662 goto out;
7663 }
7664
7665 local_irq_disable();
7666 arch_spin_lock(&tr->max_lock);
7667 if (tr->cond_snapshot)
7668 ret = -EBUSY;
7669 arch_spin_unlock(&tr->max_lock);
7670 local_irq_enable();
7671 if (ret)
7672 goto out;
7673
7674 switch (val) {
7675 case 0:
7676 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7677 ret = -EINVAL;
7678 break;
7679 }
7680 if (tr->allocated_snapshot)
7681 free_snapshot(tr);
7682 break;
7683 case 1:
7684 /* Only allow per-cpu swap if the ring buffer supports it */
7685 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7686 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7687 ret = -EINVAL;
7688 break;
7689 }
7690 #endif
7691 if (tr->allocated_snapshot)
7692 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7693 &tr->array_buffer, iter->cpu_file);
7694 else
7695 ret = tracing_alloc_snapshot_instance(tr);
7696 if (ret < 0)
7697 break;
7698 /* Now, we're going to swap */
7699 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7700 local_irq_disable();
7701 update_max_tr(tr, current, smp_processor_id(), NULL);
7702 local_irq_enable();
7703 } else {
7704 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7705 (void *)tr, 1);
7706 }
7707 break;
7708 default:
7709 if (tr->allocated_snapshot) {
7710 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7711 tracing_reset_online_cpus(&tr->max_buffer);
7712 else
7713 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7714 }
7715 break;
7716 }
7717
7718 if (ret >= 0) {
7719 *ppos += cnt;
7720 ret = cnt;
7721 }
7722 out:
7723 mutex_unlock(&trace_types_lock);
7724 return ret;
7725 }
7726
tracing_snapshot_release(struct inode * inode,struct file * file)7727 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7728 {
7729 struct seq_file *m = file->private_data;
7730 int ret;
7731
7732 ret = tracing_release(inode, file);
7733
7734 if (file->f_mode & FMODE_READ)
7735 return ret;
7736
7737 /* If write only, the seq_file is just a stub */
7738 if (m)
7739 kfree(m->private);
7740 kfree(m);
7741
7742 return 0;
7743 }
7744
7745 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7746 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7747 size_t count, loff_t *ppos);
7748 static int tracing_buffers_release(struct inode *inode, struct file *file);
7749 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7750 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7751
snapshot_raw_open(struct inode * inode,struct file * filp)7752 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7753 {
7754 struct ftrace_buffer_info *info;
7755 int ret;
7756
7757 /* The following checks for tracefs lockdown */
7758 ret = tracing_buffers_open(inode, filp);
7759 if (ret < 0)
7760 return ret;
7761
7762 info = filp->private_data;
7763
7764 if (info->iter.trace->use_max_tr) {
7765 tracing_buffers_release(inode, filp);
7766 return -EBUSY;
7767 }
7768
7769 info->iter.snapshot = true;
7770 info->iter.array_buffer = &info->iter.tr->max_buffer;
7771
7772 return ret;
7773 }
7774
7775 #endif /* CONFIG_TRACER_SNAPSHOT */
7776
7777
7778 static const struct file_operations tracing_thresh_fops = {
7779 .open = tracing_open_generic,
7780 .read = tracing_thresh_read,
7781 .write = tracing_thresh_write,
7782 .llseek = generic_file_llseek,
7783 };
7784
7785 #ifdef CONFIG_TRACER_MAX_TRACE
7786 static const struct file_operations tracing_max_lat_fops = {
7787 .open = tracing_open_generic_tr,
7788 .read = tracing_max_lat_read,
7789 .write = tracing_max_lat_write,
7790 .llseek = generic_file_llseek,
7791 .release = tracing_release_generic_tr,
7792 };
7793 #endif
7794
7795 static const struct file_operations set_tracer_fops = {
7796 .open = tracing_open_generic_tr,
7797 .read = tracing_set_trace_read,
7798 .write = tracing_set_trace_write,
7799 .llseek = generic_file_llseek,
7800 .release = tracing_release_generic_tr,
7801 };
7802
7803 static const struct file_operations tracing_pipe_fops = {
7804 .open = tracing_open_pipe,
7805 .poll = tracing_poll_pipe,
7806 .read = tracing_read_pipe,
7807 .splice_read = tracing_splice_read_pipe,
7808 .release = tracing_release_pipe,
7809 .llseek = no_llseek,
7810 };
7811
7812 static const struct file_operations tracing_entries_fops = {
7813 .open = tracing_open_generic_tr,
7814 .read = tracing_entries_read,
7815 .write = tracing_entries_write,
7816 .llseek = generic_file_llseek,
7817 .release = tracing_release_generic_tr,
7818 };
7819
7820 static const struct file_operations tracing_total_entries_fops = {
7821 .open = tracing_open_generic_tr,
7822 .read = tracing_total_entries_read,
7823 .llseek = generic_file_llseek,
7824 .release = tracing_release_generic_tr,
7825 };
7826
7827 static const struct file_operations tracing_free_buffer_fops = {
7828 .open = tracing_open_generic_tr,
7829 .write = tracing_free_buffer_write,
7830 .release = tracing_free_buffer_release,
7831 };
7832
7833 static const struct file_operations tracing_mark_fops = {
7834 .open = tracing_mark_open,
7835 .write = tracing_mark_write,
7836 .release = tracing_release_generic_tr,
7837 };
7838
7839 static const struct file_operations tracing_mark_raw_fops = {
7840 .open = tracing_mark_open,
7841 .write = tracing_mark_raw_write,
7842 .release = tracing_release_generic_tr,
7843 };
7844
7845 static const struct file_operations trace_clock_fops = {
7846 .open = tracing_clock_open,
7847 .read = seq_read,
7848 .llseek = seq_lseek,
7849 .release = tracing_single_release_tr,
7850 .write = tracing_clock_write,
7851 };
7852
7853 static const struct file_operations trace_time_stamp_mode_fops = {
7854 .open = tracing_time_stamp_mode_open,
7855 .read = seq_read,
7856 .llseek = seq_lseek,
7857 .release = tracing_single_release_tr,
7858 };
7859
7860 #ifdef CONFIG_TRACER_SNAPSHOT
7861 static const struct file_operations snapshot_fops = {
7862 .open = tracing_snapshot_open,
7863 .read = seq_read,
7864 .write = tracing_snapshot_write,
7865 .llseek = tracing_lseek,
7866 .release = tracing_snapshot_release,
7867 };
7868
7869 static const struct file_operations snapshot_raw_fops = {
7870 .open = snapshot_raw_open,
7871 .read = tracing_buffers_read,
7872 .release = tracing_buffers_release,
7873 .splice_read = tracing_buffers_splice_read,
7874 .llseek = no_llseek,
7875 };
7876
7877 #endif /* CONFIG_TRACER_SNAPSHOT */
7878
7879 /*
7880 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7881 * @filp: The active open file structure
7882 * @ubuf: The userspace provided buffer to read value into
7883 * @cnt: The maximum number of bytes to read
7884 * @ppos: The current "file" position
7885 *
7886 * This function implements the write interface for a struct trace_min_max_param.
7887 * The filp->private_data must point to a trace_min_max_param structure that
7888 * defines where to write the value, the min and the max acceptable values,
7889 * and a lock to protect the write.
7890 */
7891 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7892 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7893 {
7894 struct trace_min_max_param *param = filp->private_data;
7895 u64 val;
7896 int err;
7897
7898 if (!param)
7899 return -EFAULT;
7900
7901 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7902 if (err)
7903 return err;
7904
7905 if (param->lock)
7906 mutex_lock(param->lock);
7907
7908 if (param->min && val < *param->min)
7909 err = -EINVAL;
7910
7911 if (param->max && val > *param->max)
7912 err = -EINVAL;
7913
7914 if (!err)
7915 *param->val = val;
7916
7917 if (param->lock)
7918 mutex_unlock(param->lock);
7919
7920 if (err)
7921 return err;
7922
7923 return cnt;
7924 }
7925
7926 /*
7927 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7928 * @filp: The active open file structure
7929 * @ubuf: The userspace provided buffer to read value into
7930 * @cnt: The maximum number of bytes to read
7931 * @ppos: The current "file" position
7932 *
7933 * This function implements the read interface for a struct trace_min_max_param.
7934 * The filp->private_data must point to a trace_min_max_param struct with valid
7935 * data.
7936 */
7937 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7938 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7939 {
7940 struct trace_min_max_param *param = filp->private_data;
7941 char buf[U64_STR_SIZE];
7942 int len;
7943 u64 val;
7944
7945 if (!param)
7946 return -EFAULT;
7947
7948 val = *param->val;
7949
7950 if (cnt > sizeof(buf))
7951 cnt = sizeof(buf);
7952
7953 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7954
7955 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7956 }
7957
7958 const struct file_operations trace_min_max_fops = {
7959 .open = tracing_open_generic,
7960 .read = trace_min_max_read,
7961 .write = trace_min_max_write,
7962 };
7963
7964 #define TRACING_LOG_ERRS_MAX 8
7965 #define TRACING_LOG_LOC_MAX 128
7966
7967 #define CMD_PREFIX " Command: "
7968
7969 struct err_info {
7970 const char **errs; /* ptr to loc-specific array of err strings */
7971 u8 type; /* index into errs -> specific err string */
7972 u16 pos; /* caret position */
7973 u64 ts;
7974 };
7975
7976 struct tracing_log_err {
7977 struct list_head list;
7978 struct err_info info;
7979 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7980 char *cmd; /* what caused err */
7981 };
7982
7983 static DEFINE_MUTEX(tracing_err_log_lock);
7984
alloc_tracing_log_err(int len)7985 static struct tracing_log_err *alloc_tracing_log_err(int len)
7986 {
7987 struct tracing_log_err *err;
7988
7989 err = kzalloc(sizeof(*err), GFP_KERNEL);
7990 if (!err)
7991 return ERR_PTR(-ENOMEM);
7992
7993 err->cmd = kzalloc(len, GFP_KERNEL);
7994 if (!err->cmd) {
7995 kfree(err);
7996 return ERR_PTR(-ENOMEM);
7997 }
7998
7999 return err;
8000 }
8001
free_tracing_log_err(struct tracing_log_err * err)8002 static void free_tracing_log_err(struct tracing_log_err *err)
8003 {
8004 kfree(err->cmd);
8005 kfree(err);
8006 }
8007
get_tracing_log_err(struct trace_array * tr,int len)8008 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8009 int len)
8010 {
8011 struct tracing_log_err *err;
8012 char *cmd;
8013
8014 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8015 err = alloc_tracing_log_err(len);
8016 if (PTR_ERR(err) != -ENOMEM)
8017 tr->n_err_log_entries++;
8018
8019 return err;
8020 }
8021 cmd = kzalloc(len, GFP_KERNEL);
8022 if (!cmd)
8023 return ERR_PTR(-ENOMEM);
8024 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8025 kfree(err->cmd);
8026 err->cmd = cmd;
8027 list_del(&err->list);
8028
8029 return err;
8030 }
8031
8032 /**
8033 * err_pos - find the position of a string within a command for error careting
8034 * @cmd: The tracing command that caused the error
8035 * @str: The string to position the caret at within @cmd
8036 *
8037 * Finds the position of the first occurrence of @str within @cmd. The
8038 * return value can be passed to tracing_log_err() for caret placement
8039 * within @cmd.
8040 *
8041 * Returns the index within @cmd of the first occurrence of @str or 0
8042 * if @str was not found.
8043 */
err_pos(char * cmd,const char * str)8044 unsigned int err_pos(char *cmd, const char *str)
8045 {
8046 char *found;
8047
8048 if (WARN_ON(!strlen(cmd)))
8049 return 0;
8050
8051 found = strstr(cmd, str);
8052 if (found)
8053 return found - cmd;
8054
8055 return 0;
8056 }
8057
8058 /**
8059 * tracing_log_err - write an error to the tracing error log
8060 * @tr: The associated trace array for the error (NULL for top level array)
8061 * @loc: A string describing where the error occurred
8062 * @cmd: The tracing command that caused the error
8063 * @errs: The array of loc-specific static error strings
8064 * @type: The index into errs[], which produces the specific static err string
8065 * @pos: The position the caret should be placed in the cmd
8066 *
8067 * Writes an error into tracing/error_log of the form:
8068 *
8069 * <loc>: error: <text>
8070 * Command: <cmd>
8071 * ^
8072 *
8073 * tracing/error_log is a small log file containing the last
8074 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8075 * unless there has been a tracing error, and the error log can be
8076 * cleared and have its memory freed by writing the empty string in
8077 * truncation mode to it i.e. echo > tracing/error_log.
8078 *
8079 * NOTE: the @errs array along with the @type param are used to
8080 * produce a static error string - this string is not copied and saved
8081 * when the error is logged - only a pointer to it is saved. See
8082 * existing callers for examples of how static strings are typically
8083 * defined for use with tracing_log_err().
8084 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8085 void tracing_log_err(struct trace_array *tr,
8086 const char *loc, const char *cmd,
8087 const char **errs, u8 type, u16 pos)
8088 {
8089 struct tracing_log_err *err;
8090 int len = 0;
8091
8092 if (!tr)
8093 tr = &global_trace;
8094
8095 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8096
8097 mutex_lock(&tracing_err_log_lock);
8098 err = get_tracing_log_err(tr, len);
8099 if (PTR_ERR(err) == -ENOMEM) {
8100 mutex_unlock(&tracing_err_log_lock);
8101 return;
8102 }
8103
8104 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8105 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8106
8107 err->info.errs = errs;
8108 err->info.type = type;
8109 err->info.pos = pos;
8110 err->info.ts = local_clock();
8111
8112 list_add_tail(&err->list, &tr->err_log);
8113 mutex_unlock(&tracing_err_log_lock);
8114 }
8115
clear_tracing_err_log(struct trace_array * tr)8116 static void clear_tracing_err_log(struct trace_array *tr)
8117 {
8118 struct tracing_log_err *err, *next;
8119
8120 mutex_lock(&tracing_err_log_lock);
8121 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8122 list_del(&err->list);
8123 free_tracing_log_err(err);
8124 }
8125
8126 tr->n_err_log_entries = 0;
8127 mutex_unlock(&tracing_err_log_lock);
8128 }
8129
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8130 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8131 {
8132 struct trace_array *tr = m->private;
8133
8134 mutex_lock(&tracing_err_log_lock);
8135
8136 return seq_list_start(&tr->err_log, *pos);
8137 }
8138
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8139 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8140 {
8141 struct trace_array *tr = m->private;
8142
8143 return seq_list_next(v, &tr->err_log, pos);
8144 }
8145
tracing_err_log_seq_stop(struct seq_file * m,void * v)8146 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8147 {
8148 mutex_unlock(&tracing_err_log_lock);
8149 }
8150
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8151 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8152 {
8153 u16 i;
8154
8155 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8156 seq_putc(m, ' ');
8157 for (i = 0; i < pos; i++)
8158 seq_putc(m, ' ');
8159 seq_puts(m, "^\n");
8160 }
8161
tracing_err_log_seq_show(struct seq_file * m,void * v)8162 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8163 {
8164 struct tracing_log_err *err = v;
8165
8166 if (err) {
8167 const char *err_text = err->info.errs[err->info.type];
8168 u64 sec = err->info.ts;
8169 u32 nsec;
8170
8171 nsec = do_div(sec, NSEC_PER_SEC);
8172 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8173 err->loc, err_text);
8174 seq_printf(m, "%s", err->cmd);
8175 tracing_err_log_show_pos(m, err->info.pos);
8176 }
8177
8178 return 0;
8179 }
8180
8181 static const struct seq_operations tracing_err_log_seq_ops = {
8182 .start = tracing_err_log_seq_start,
8183 .next = tracing_err_log_seq_next,
8184 .stop = tracing_err_log_seq_stop,
8185 .show = tracing_err_log_seq_show
8186 };
8187
tracing_err_log_open(struct inode * inode,struct file * file)8188 static int tracing_err_log_open(struct inode *inode, struct file *file)
8189 {
8190 struct trace_array *tr = inode->i_private;
8191 int ret = 0;
8192
8193 ret = tracing_check_open_get_tr(tr);
8194 if (ret)
8195 return ret;
8196
8197 /* If this file was opened for write, then erase contents */
8198 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8199 clear_tracing_err_log(tr);
8200
8201 if (file->f_mode & FMODE_READ) {
8202 ret = seq_open(file, &tracing_err_log_seq_ops);
8203 if (!ret) {
8204 struct seq_file *m = file->private_data;
8205 m->private = tr;
8206 } else {
8207 trace_array_put(tr);
8208 }
8209 }
8210 return ret;
8211 }
8212
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8213 static ssize_t tracing_err_log_write(struct file *file,
8214 const char __user *buffer,
8215 size_t count, loff_t *ppos)
8216 {
8217 return count;
8218 }
8219
tracing_err_log_release(struct inode * inode,struct file * file)8220 static int tracing_err_log_release(struct inode *inode, struct file *file)
8221 {
8222 struct trace_array *tr = inode->i_private;
8223
8224 trace_array_put(tr);
8225
8226 if (file->f_mode & FMODE_READ)
8227 seq_release(inode, file);
8228
8229 return 0;
8230 }
8231
8232 static const struct file_operations tracing_err_log_fops = {
8233 .open = tracing_err_log_open,
8234 .write = tracing_err_log_write,
8235 .read = seq_read,
8236 .llseek = tracing_lseek,
8237 .release = tracing_err_log_release,
8238 };
8239
tracing_buffers_open(struct inode * inode,struct file * filp)8240 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8241 {
8242 struct trace_array *tr = inode->i_private;
8243 struct ftrace_buffer_info *info;
8244 int ret;
8245
8246 ret = tracing_check_open_get_tr(tr);
8247 if (ret)
8248 return ret;
8249
8250 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8251 if (!info) {
8252 trace_array_put(tr);
8253 return -ENOMEM;
8254 }
8255
8256 mutex_lock(&trace_types_lock);
8257
8258 info->iter.tr = tr;
8259 info->iter.cpu_file = tracing_get_cpu(inode);
8260 info->iter.trace = tr->current_trace;
8261 info->iter.array_buffer = &tr->array_buffer;
8262 info->spare = NULL;
8263 /* Force reading ring buffer for first read */
8264 info->read = (unsigned int)-1;
8265
8266 filp->private_data = info;
8267
8268 tr->trace_ref++;
8269
8270 mutex_unlock(&trace_types_lock);
8271
8272 ret = nonseekable_open(inode, filp);
8273 if (ret < 0)
8274 trace_array_put(tr);
8275
8276 return ret;
8277 }
8278
8279 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8280 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8281 {
8282 struct ftrace_buffer_info *info = filp->private_data;
8283 struct trace_iterator *iter = &info->iter;
8284
8285 return trace_poll(iter, filp, poll_table);
8286 }
8287
8288 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8289 tracing_buffers_read(struct file *filp, char __user *ubuf,
8290 size_t count, loff_t *ppos)
8291 {
8292 struct ftrace_buffer_info *info = filp->private_data;
8293 struct trace_iterator *iter = &info->iter;
8294 ssize_t ret = 0;
8295 ssize_t size;
8296
8297 if (!count)
8298 return 0;
8299
8300 #ifdef CONFIG_TRACER_MAX_TRACE
8301 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8302 return -EBUSY;
8303 #endif
8304
8305 if (!info->spare) {
8306 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8307 iter->cpu_file);
8308 if (IS_ERR(info->spare)) {
8309 ret = PTR_ERR(info->spare);
8310 info->spare = NULL;
8311 } else {
8312 info->spare_cpu = iter->cpu_file;
8313 }
8314 }
8315 if (!info->spare)
8316 return ret;
8317
8318 /* Do we have previous read data to read? */
8319 if (info->read < PAGE_SIZE)
8320 goto read;
8321
8322 again:
8323 trace_access_lock(iter->cpu_file);
8324 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8325 &info->spare,
8326 count,
8327 iter->cpu_file, 0);
8328 trace_access_unlock(iter->cpu_file);
8329
8330 if (ret < 0) {
8331 if (trace_empty(iter)) {
8332 if ((filp->f_flags & O_NONBLOCK))
8333 return -EAGAIN;
8334
8335 ret = wait_on_pipe(iter, 0);
8336 if (ret)
8337 return ret;
8338
8339 goto again;
8340 }
8341 return 0;
8342 }
8343
8344 info->read = 0;
8345 read:
8346 size = PAGE_SIZE - info->read;
8347 if (size > count)
8348 size = count;
8349
8350 ret = copy_to_user(ubuf, info->spare + info->read, size);
8351 if (ret == size)
8352 return -EFAULT;
8353
8354 size -= ret;
8355
8356 *ppos += size;
8357 info->read += size;
8358
8359 return size;
8360 }
8361
tracing_buffers_flush(struct file * file,fl_owner_t id)8362 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8363 {
8364 struct ftrace_buffer_info *info = file->private_data;
8365 struct trace_iterator *iter = &info->iter;
8366
8367 iter->wait_index++;
8368 /* Make sure the waiters see the new wait_index */
8369 smp_wmb();
8370
8371 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8372
8373 return 0;
8374 }
8375
tracing_buffers_release(struct inode * inode,struct file * file)8376 static int tracing_buffers_release(struct inode *inode, struct file *file)
8377 {
8378 struct ftrace_buffer_info *info = file->private_data;
8379 struct trace_iterator *iter = &info->iter;
8380
8381 mutex_lock(&trace_types_lock);
8382
8383 iter->tr->trace_ref--;
8384
8385 __trace_array_put(iter->tr);
8386
8387 if (info->spare)
8388 ring_buffer_free_read_page(iter->array_buffer->buffer,
8389 info->spare_cpu, info->spare);
8390 kvfree(info);
8391
8392 mutex_unlock(&trace_types_lock);
8393
8394 return 0;
8395 }
8396
8397 struct buffer_ref {
8398 struct trace_buffer *buffer;
8399 void *page;
8400 int cpu;
8401 refcount_t refcount;
8402 };
8403
buffer_ref_release(struct buffer_ref * ref)8404 static void buffer_ref_release(struct buffer_ref *ref)
8405 {
8406 if (!refcount_dec_and_test(&ref->refcount))
8407 return;
8408 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8409 kfree(ref);
8410 }
8411
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8412 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8413 struct pipe_buffer *buf)
8414 {
8415 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8416
8417 buffer_ref_release(ref);
8418 buf->private = 0;
8419 }
8420
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8421 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8422 struct pipe_buffer *buf)
8423 {
8424 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8425
8426 if (refcount_read(&ref->refcount) > INT_MAX/2)
8427 return false;
8428
8429 refcount_inc(&ref->refcount);
8430 return true;
8431 }
8432
8433 /* Pipe buffer operations for a buffer. */
8434 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8435 .release = buffer_pipe_buf_release,
8436 .get = buffer_pipe_buf_get,
8437 };
8438
8439 /*
8440 * Callback from splice_to_pipe(), if we need to release some pages
8441 * at the end of the spd in case we error'ed out in filling the pipe.
8442 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8443 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8444 {
8445 struct buffer_ref *ref =
8446 (struct buffer_ref *)spd->partial[i].private;
8447
8448 buffer_ref_release(ref);
8449 spd->partial[i].private = 0;
8450 }
8451
8452 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8453 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8454 struct pipe_inode_info *pipe, size_t len,
8455 unsigned int flags)
8456 {
8457 struct ftrace_buffer_info *info = file->private_data;
8458 struct trace_iterator *iter = &info->iter;
8459 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8460 struct page *pages_def[PIPE_DEF_BUFFERS];
8461 struct splice_pipe_desc spd = {
8462 .pages = pages_def,
8463 .partial = partial_def,
8464 .nr_pages_max = PIPE_DEF_BUFFERS,
8465 .ops = &buffer_pipe_buf_ops,
8466 .spd_release = buffer_spd_release,
8467 };
8468 struct buffer_ref *ref;
8469 int entries, i;
8470 ssize_t ret = 0;
8471
8472 #ifdef CONFIG_TRACER_MAX_TRACE
8473 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8474 return -EBUSY;
8475 #endif
8476
8477 if (*ppos & (PAGE_SIZE - 1))
8478 return -EINVAL;
8479
8480 if (len & (PAGE_SIZE - 1)) {
8481 if (len < PAGE_SIZE)
8482 return -EINVAL;
8483 len &= PAGE_MASK;
8484 }
8485
8486 if (splice_grow_spd(pipe, &spd))
8487 return -ENOMEM;
8488
8489 again:
8490 trace_access_lock(iter->cpu_file);
8491 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8492
8493 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8494 struct page *page;
8495 int r;
8496
8497 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8498 if (!ref) {
8499 ret = -ENOMEM;
8500 break;
8501 }
8502
8503 refcount_set(&ref->refcount, 1);
8504 ref->buffer = iter->array_buffer->buffer;
8505 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8506 if (IS_ERR(ref->page)) {
8507 ret = PTR_ERR(ref->page);
8508 ref->page = NULL;
8509 kfree(ref);
8510 break;
8511 }
8512 ref->cpu = iter->cpu_file;
8513
8514 r = ring_buffer_read_page(ref->buffer, &ref->page,
8515 len, iter->cpu_file, 1);
8516 if (r < 0) {
8517 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8518 ref->page);
8519 kfree(ref);
8520 break;
8521 }
8522
8523 page = virt_to_page(ref->page);
8524
8525 spd.pages[i] = page;
8526 spd.partial[i].len = PAGE_SIZE;
8527 spd.partial[i].offset = 0;
8528 spd.partial[i].private = (unsigned long)ref;
8529 spd.nr_pages++;
8530 *ppos += PAGE_SIZE;
8531
8532 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8533 }
8534
8535 trace_access_unlock(iter->cpu_file);
8536 spd.nr_pages = i;
8537
8538 /* did we read anything? */
8539 if (!spd.nr_pages) {
8540 long wait_index;
8541
8542 if (ret)
8543 goto out;
8544
8545 ret = -EAGAIN;
8546 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8547 goto out;
8548
8549 wait_index = READ_ONCE(iter->wait_index);
8550
8551 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8552 if (ret)
8553 goto out;
8554
8555 /* No need to wait after waking up when tracing is off */
8556 if (!tracer_tracing_is_on(iter->tr))
8557 goto out;
8558
8559 /* Make sure we see the new wait_index */
8560 smp_rmb();
8561 if (wait_index != iter->wait_index)
8562 goto out;
8563
8564 goto again;
8565 }
8566
8567 ret = splice_to_pipe(pipe, &spd);
8568 out:
8569 splice_shrink_spd(&spd);
8570
8571 return ret;
8572 }
8573
8574 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8575 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8576 {
8577 struct ftrace_buffer_info *info = file->private_data;
8578 struct trace_iterator *iter = &info->iter;
8579
8580 if (cmd)
8581 return -ENOIOCTLCMD;
8582
8583 mutex_lock(&trace_types_lock);
8584
8585 iter->wait_index++;
8586 /* Make sure the waiters see the new wait_index */
8587 smp_wmb();
8588
8589 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8590
8591 mutex_unlock(&trace_types_lock);
8592 return 0;
8593 }
8594
8595 static const struct file_operations tracing_buffers_fops = {
8596 .open = tracing_buffers_open,
8597 .read = tracing_buffers_read,
8598 .poll = tracing_buffers_poll,
8599 .release = tracing_buffers_release,
8600 .flush = tracing_buffers_flush,
8601 .splice_read = tracing_buffers_splice_read,
8602 .unlocked_ioctl = tracing_buffers_ioctl,
8603 .llseek = no_llseek,
8604 };
8605
8606 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8607 tracing_stats_read(struct file *filp, char __user *ubuf,
8608 size_t count, loff_t *ppos)
8609 {
8610 struct inode *inode = file_inode(filp);
8611 struct trace_array *tr = inode->i_private;
8612 struct array_buffer *trace_buf = &tr->array_buffer;
8613 int cpu = tracing_get_cpu(inode);
8614 struct trace_seq *s;
8615 unsigned long cnt;
8616 unsigned long long t;
8617 unsigned long usec_rem;
8618
8619 s = kmalloc(sizeof(*s), GFP_KERNEL);
8620 if (!s)
8621 return -ENOMEM;
8622
8623 trace_seq_init(s);
8624
8625 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8626 trace_seq_printf(s, "entries: %ld\n", cnt);
8627
8628 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8629 trace_seq_printf(s, "overrun: %ld\n", cnt);
8630
8631 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8632 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8633
8634 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8635 trace_seq_printf(s, "bytes: %ld\n", cnt);
8636
8637 if (trace_clocks[tr->clock_id].in_ns) {
8638 /* local or global for trace_clock */
8639 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8640 usec_rem = do_div(t, USEC_PER_SEC);
8641 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8642 t, usec_rem);
8643
8644 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8645 usec_rem = do_div(t, USEC_PER_SEC);
8646 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8647 } else {
8648 /* counter or tsc mode for trace_clock */
8649 trace_seq_printf(s, "oldest event ts: %llu\n",
8650 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8651
8652 trace_seq_printf(s, "now ts: %llu\n",
8653 ring_buffer_time_stamp(trace_buf->buffer));
8654 }
8655
8656 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8657 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8658
8659 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8660 trace_seq_printf(s, "read events: %ld\n", cnt);
8661
8662 count = simple_read_from_buffer(ubuf, count, ppos,
8663 s->buffer, trace_seq_used(s));
8664
8665 kfree(s);
8666
8667 return count;
8668 }
8669
8670 static const struct file_operations tracing_stats_fops = {
8671 .open = tracing_open_generic_tr,
8672 .read = tracing_stats_read,
8673 .llseek = generic_file_llseek,
8674 .release = tracing_release_generic_tr,
8675 };
8676
8677 #ifdef CONFIG_DYNAMIC_FTRACE
8678
8679 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8680 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8681 size_t cnt, loff_t *ppos)
8682 {
8683 ssize_t ret;
8684 char *buf;
8685 int r;
8686
8687 /* 256 should be plenty to hold the amount needed */
8688 buf = kmalloc(256, GFP_KERNEL);
8689 if (!buf)
8690 return -ENOMEM;
8691
8692 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8693 ftrace_update_tot_cnt,
8694 ftrace_number_of_pages,
8695 ftrace_number_of_groups);
8696
8697 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8698 kfree(buf);
8699 return ret;
8700 }
8701
8702 static const struct file_operations tracing_dyn_info_fops = {
8703 .open = tracing_open_generic,
8704 .read = tracing_read_dyn_info,
8705 .llseek = generic_file_llseek,
8706 };
8707 #endif /* CONFIG_DYNAMIC_FTRACE */
8708
8709 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8710 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8711 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8712 struct trace_array *tr, struct ftrace_probe_ops *ops,
8713 void *data)
8714 {
8715 tracing_snapshot_instance(tr);
8716 }
8717
8718 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8719 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8720 struct trace_array *tr, struct ftrace_probe_ops *ops,
8721 void *data)
8722 {
8723 struct ftrace_func_mapper *mapper = data;
8724 long *count = NULL;
8725
8726 if (mapper)
8727 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8728
8729 if (count) {
8730
8731 if (*count <= 0)
8732 return;
8733
8734 (*count)--;
8735 }
8736
8737 tracing_snapshot_instance(tr);
8738 }
8739
8740 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8741 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8742 struct ftrace_probe_ops *ops, void *data)
8743 {
8744 struct ftrace_func_mapper *mapper = data;
8745 long *count = NULL;
8746
8747 seq_printf(m, "%ps:", (void *)ip);
8748
8749 seq_puts(m, "snapshot");
8750
8751 if (mapper)
8752 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8753
8754 if (count)
8755 seq_printf(m, ":count=%ld\n", *count);
8756 else
8757 seq_puts(m, ":unlimited\n");
8758
8759 return 0;
8760 }
8761
8762 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8763 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8764 unsigned long ip, void *init_data, void **data)
8765 {
8766 struct ftrace_func_mapper *mapper = *data;
8767
8768 if (!mapper) {
8769 mapper = allocate_ftrace_func_mapper();
8770 if (!mapper)
8771 return -ENOMEM;
8772 *data = mapper;
8773 }
8774
8775 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8776 }
8777
8778 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8779 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8780 unsigned long ip, void *data)
8781 {
8782 struct ftrace_func_mapper *mapper = data;
8783
8784 if (!ip) {
8785 if (!mapper)
8786 return;
8787 free_ftrace_func_mapper(mapper, NULL);
8788 return;
8789 }
8790
8791 ftrace_func_mapper_remove_ip(mapper, ip);
8792 }
8793
8794 static struct ftrace_probe_ops snapshot_probe_ops = {
8795 .func = ftrace_snapshot,
8796 .print = ftrace_snapshot_print,
8797 };
8798
8799 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8800 .func = ftrace_count_snapshot,
8801 .print = ftrace_snapshot_print,
8802 .init = ftrace_snapshot_init,
8803 .free = ftrace_snapshot_free,
8804 };
8805
8806 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8807 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8808 char *glob, char *cmd, char *param, int enable)
8809 {
8810 struct ftrace_probe_ops *ops;
8811 void *count = (void *)-1;
8812 char *number;
8813 int ret;
8814
8815 if (!tr)
8816 return -ENODEV;
8817
8818 /* hash funcs only work with set_ftrace_filter */
8819 if (!enable)
8820 return -EINVAL;
8821
8822 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8823
8824 if (glob[0] == '!')
8825 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8826
8827 if (!param)
8828 goto out_reg;
8829
8830 number = strsep(¶m, ":");
8831
8832 if (!strlen(number))
8833 goto out_reg;
8834
8835 /*
8836 * We use the callback data field (which is a pointer)
8837 * as our counter.
8838 */
8839 ret = kstrtoul(number, 0, (unsigned long *)&count);
8840 if (ret)
8841 return ret;
8842
8843 out_reg:
8844 ret = tracing_alloc_snapshot_instance(tr);
8845 if (ret < 0)
8846 goto out;
8847
8848 ret = register_ftrace_function_probe(glob, tr, ops, count);
8849
8850 out:
8851 return ret < 0 ? ret : 0;
8852 }
8853
8854 static struct ftrace_func_command ftrace_snapshot_cmd = {
8855 .name = "snapshot",
8856 .func = ftrace_trace_snapshot_callback,
8857 };
8858
register_snapshot_cmd(void)8859 static __init int register_snapshot_cmd(void)
8860 {
8861 return register_ftrace_command(&ftrace_snapshot_cmd);
8862 }
8863 #else
register_snapshot_cmd(void)8864 static inline __init int register_snapshot_cmd(void) { return 0; }
8865 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8866
tracing_get_dentry(struct trace_array * tr)8867 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8868 {
8869 if (WARN_ON(!tr->dir))
8870 return ERR_PTR(-ENODEV);
8871
8872 /* Top directory uses NULL as the parent */
8873 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8874 return NULL;
8875
8876 /* All sub buffers have a descriptor */
8877 return tr->dir;
8878 }
8879
tracing_dentry_percpu(struct trace_array * tr,int cpu)8880 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8881 {
8882 struct dentry *d_tracer;
8883
8884 if (tr->percpu_dir)
8885 return tr->percpu_dir;
8886
8887 d_tracer = tracing_get_dentry(tr);
8888 if (IS_ERR(d_tracer))
8889 return NULL;
8890
8891 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8892
8893 MEM_FAIL(!tr->percpu_dir,
8894 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8895
8896 return tr->percpu_dir;
8897 }
8898
8899 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8900 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8901 void *data, long cpu, const struct file_operations *fops)
8902 {
8903 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8904
8905 if (ret) /* See tracing_get_cpu() */
8906 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8907 return ret;
8908 }
8909
8910 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8911 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8912 {
8913 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8914 struct dentry *d_cpu;
8915 char cpu_dir[30]; /* 30 characters should be more than enough */
8916
8917 if (!d_percpu)
8918 return;
8919
8920 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8921 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8922 if (!d_cpu) {
8923 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8924 return;
8925 }
8926
8927 /* per cpu trace_pipe */
8928 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8929 tr, cpu, &tracing_pipe_fops);
8930
8931 /* per cpu trace */
8932 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8933 tr, cpu, &tracing_fops);
8934
8935 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8936 tr, cpu, &tracing_buffers_fops);
8937
8938 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8939 tr, cpu, &tracing_stats_fops);
8940
8941 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8942 tr, cpu, &tracing_entries_fops);
8943
8944 #ifdef CONFIG_TRACER_SNAPSHOT
8945 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8946 tr, cpu, &snapshot_fops);
8947
8948 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8949 tr, cpu, &snapshot_raw_fops);
8950 #endif
8951 }
8952
8953 #ifdef CONFIG_FTRACE_SELFTEST
8954 /* Let selftest have access to static functions in this file */
8955 #include "trace_selftest.c"
8956 #endif
8957
8958 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8959 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8960 loff_t *ppos)
8961 {
8962 struct trace_option_dentry *topt = filp->private_data;
8963 char *buf;
8964
8965 if (topt->flags->val & topt->opt->bit)
8966 buf = "1\n";
8967 else
8968 buf = "0\n";
8969
8970 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8971 }
8972
8973 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8974 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8975 loff_t *ppos)
8976 {
8977 struct trace_option_dentry *topt = filp->private_data;
8978 unsigned long val;
8979 int ret;
8980
8981 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8982 if (ret)
8983 return ret;
8984
8985 if (val != 0 && val != 1)
8986 return -EINVAL;
8987
8988 if (!!(topt->flags->val & topt->opt->bit) != val) {
8989 mutex_lock(&trace_types_lock);
8990 ret = __set_tracer_option(topt->tr, topt->flags,
8991 topt->opt, !val);
8992 mutex_unlock(&trace_types_lock);
8993 if (ret)
8994 return ret;
8995 }
8996
8997 *ppos += cnt;
8998
8999 return cnt;
9000 }
9001
tracing_open_options(struct inode * inode,struct file * filp)9002 static int tracing_open_options(struct inode *inode, struct file *filp)
9003 {
9004 struct trace_option_dentry *topt = inode->i_private;
9005 int ret;
9006
9007 ret = tracing_check_open_get_tr(topt->tr);
9008 if (ret)
9009 return ret;
9010
9011 filp->private_data = inode->i_private;
9012 return 0;
9013 }
9014
tracing_release_options(struct inode * inode,struct file * file)9015 static int tracing_release_options(struct inode *inode, struct file *file)
9016 {
9017 struct trace_option_dentry *topt = file->private_data;
9018
9019 trace_array_put(topt->tr);
9020 return 0;
9021 }
9022
9023 static const struct file_operations trace_options_fops = {
9024 .open = tracing_open_options,
9025 .read = trace_options_read,
9026 .write = trace_options_write,
9027 .llseek = generic_file_llseek,
9028 .release = tracing_release_options,
9029 };
9030
9031 /*
9032 * In order to pass in both the trace_array descriptor as well as the index
9033 * to the flag that the trace option file represents, the trace_array
9034 * has a character array of trace_flags_index[], which holds the index
9035 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9036 * The address of this character array is passed to the flag option file
9037 * read/write callbacks.
9038 *
9039 * In order to extract both the index and the trace_array descriptor,
9040 * get_tr_index() uses the following algorithm.
9041 *
9042 * idx = *ptr;
9043 *
9044 * As the pointer itself contains the address of the index (remember
9045 * index[1] == 1).
9046 *
9047 * Then to get the trace_array descriptor, by subtracting that index
9048 * from the ptr, we get to the start of the index itself.
9049 *
9050 * ptr - idx == &index[0]
9051 *
9052 * Then a simple container_of() from that pointer gets us to the
9053 * trace_array descriptor.
9054 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9055 static void get_tr_index(void *data, struct trace_array **ptr,
9056 unsigned int *pindex)
9057 {
9058 *pindex = *(unsigned char *)data;
9059
9060 *ptr = container_of(data - *pindex, struct trace_array,
9061 trace_flags_index);
9062 }
9063
9064 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9065 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9066 loff_t *ppos)
9067 {
9068 void *tr_index = filp->private_data;
9069 struct trace_array *tr;
9070 unsigned int index;
9071 char *buf;
9072
9073 get_tr_index(tr_index, &tr, &index);
9074
9075 if (tr->trace_flags & (1 << index))
9076 buf = "1\n";
9077 else
9078 buf = "0\n";
9079
9080 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9081 }
9082
9083 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9084 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9085 loff_t *ppos)
9086 {
9087 void *tr_index = filp->private_data;
9088 struct trace_array *tr;
9089 unsigned int index;
9090 unsigned long val;
9091 int ret;
9092
9093 get_tr_index(tr_index, &tr, &index);
9094
9095 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9096 if (ret)
9097 return ret;
9098
9099 if (val != 0 && val != 1)
9100 return -EINVAL;
9101
9102 mutex_lock(&event_mutex);
9103 mutex_lock(&trace_types_lock);
9104 ret = set_tracer_flag(tr, 1 << index, val);
9105 mutex_unlock(&trace_types_lock);
9106 mutex_unlock(&event_mutex);
9107
9108 if (ret < 0)
9109 return ret;
9110
9111 *ppos += cnt;
9112
9113 return cnt;
9114 }
9115
9116 static const struct file_operations trace_options_core_fops = {
9117 .open = tracing_open_generic,
9118 .read = trace_options_core_read,
9119 .write = trace_options_core_write,
9120 .llseek = generic_file_llseek,
9121 };
9122
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9123 struct dentry *trace_create_file(const char *name,
9124 umode_t mode,
9125 struct dentry *parent,
9126 void *data,
9127 const struct file_operations *fops)
9128 {
9129 struct dentry *ret;
9130
9131 ret = tracefs_create_file(name, mode, parent, data, fops);
9132 if (!ret)
9133 pr_warn("Could not create tracefs '%s' entry\n", name);
9134
9135 return ret;
9136 }
9137
9138
trace_options_init_dentry(struct trace_array * tr)9139 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9140 {
9141 struct dentry *d_tracer;
9142
9143 if (tr->options)
9144 return tr->options;
9145
9146 d_tracer = tracing_get_dentry(tr);
9147 if (IS_ERR(d_tracer))
9148 return NULL;
9149
9150 tr->options = tracefs_create_dir("options", d_tracer);
9151 if (!tr->options) {
9152 pr_warn("Could not create tracefs directory 'options'\n");
9153 return NULL;
9154 }
9155
9156 return tr->options;
9157 }
9158
9159 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9160 create_trace_option_file(struct trace_array *tr,
9161 struct trace_option_dentry *topt,
9162 struct tracer_flags *flags,
9163 struct tracer_opt *opt)
9164 {
9165 struct dentry *t_options;
9166
9167 t_options = trace_options_init_dentry(tr);
9168 if (!t_options)
9169 return;
9170
9171 topt->flags = flags;
9172 topt->opt = opt;
9173 topt->tr = tr;
9174
9175 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9176 t_options, topt, &trace_options_fops);
9177
9178 }
9179
9180 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9181 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9182 {
9183 struct trace_option_dentry *topts;
9184 struct trace_options *tr_topts;
9185 struct tracer_flags *flags;
9186 struct tracer_opt *opts;
9187 int cnt;
9188 int i;
9189
9190 if (!tracer)
9191 return;
9192
9193 flags = tracer->flags;
9194
9195 if (!flags || !flags->opts)
9196 return;
9197
9198 /*
9199 * If this is an instance, only create flags for tracers
9200 * the instance may have.
9201 */
9202 if (!trace_ok_for_array(tracer, tr))
9203 return;
9204
9205 for (i = 0; i < tr->nr_topts; i++) {
9206 /* Make sure there's no duplicate flags. */
9207 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9208 return;
9209 }
9210
9211 opts = flags->opts;
9212
9213 for (cnt = 0; opts[cnt].name; cnt++)
9214 ;
9215
9216 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9217 if (!topts)
9218 return;
9219
9220 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9221 GFP_KERNEL);
9222 if (!tr_topts) {
9223 kfree(topts);
9224 return;
9225 }
9226
9227 tr->topts = tr_topts;
9228 tr->topts[tr->nr_topts].tracer = tracer;
9229 tr->topts[tr->nr_topts].topts = topts;
9230 tr->nr_topts++;
9231
9232 for (cnt = 0; opts[cnt].name; cnt++) {
9233 create_trace_option_file(tr, &topts[cnt], flags,
9234 &opts[cnt]);
9235 MEM_FAIL(topts[cnt].entry == NULL,
9236 "Failed to create trace option: %s",
9237 opts[cnt].name);
9238 }
9239 }
9240
9241 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9242 create_trace_option_core_file(struct trace_array *tr,
9243 const char *option, long index)
9244 {
9245 struct dentry *t_options;
9246
9247 t_options = trace_options_init_dentry(tr);
9248 if (!t_options)
9249 return NULL;
9250
9251 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9252 (void *)&tr->trace_flags_index[index],
9253 &trace_options_core_fops);
9254 }
9255
create_trace_options_dir(struct trace_array * tr)9256 static void create_trace_options_dir(struct trace_array *tr)
9257 {
9258 struct dentry *t_options;
9259 bool top_level = tr == &global_trace;
9260 int i;
9261
9262 t_options = trace_options_init_dentry(tr);
9263 if (!t_options)
9264 return;
9265
9266 for (i = 0; trace_options[i]; i++) {
9267 if (top_level ||
9268 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9269 create_trace_option_core_file(tr, trace_options[i], i);
9270 }
9271 }
9272
9273 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9274 rb_simple_read(struct file *filp, char __user *ubuf,
9275 size_t cnt, loff_t *ppos)
9276 {
9277 struct trace_array *tr = filp->private_data;
9278 char buf[64];
9279 int r;
9280
9281 r = tracer_tracing_is_on(tr);
9282 r = sprintf(buf, "%d\n", r);
9283
9284 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9285 }
9286
9287 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9288 rb_simple_write(struct file *filp, const char __user *ubuf,
9289 size_t cnt, loff_t *ppos)
9290 {
9291 struct trace_array *tr = filp->private_data;
9292 struct trace_buffer *buffer = tr->array_buffer.buffer;
9293 unsigned long val;
9294 int ret;
9295
9296 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9297 if (ret)
9298 return ret;
9299
9300 if (buffer) {
9301 mutex_lock(&trace_types_lock);
9302 if (!!val == tracer_tracing_is_on(tr)) {
9303 val = 0; /* do nothing */
9304 } else if (val) {
9305 tracer_tracing_on(tr);
9306 if (tr->current_trace->start)
9307 tr->current_trace->start(tr);
9308 } else {
9309 tracer_tracing_off(tr);
9310 if (tr->current_trace->stop)
9311 tr->current_trace->stop(tr);
9312 /* Wake up any waiters */
9313 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9314 }
9315 mutex_unlock(&trace_types_lock);
9316 }
9317
9318 (*ppos)++;
9319
9320 return cnt;
9321 }
9322
9323 static const struct file_operations rb_simple_fops = {
9324 .open = tracing_open_generic_tr,
9325 .read = rb_simple_read,
9326 .write = rb_simple_write,
9327 .release = tracing_release_generic_tr,
9328 .llseek = default_llseek,
9329 };
9330
9331 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9332 buffer_percent_read(struct file *filp, char __user *ubuf,
9333 size_t cnt, loff_t *ppos)
9334 {
9335 struct trace_array *tr = filp->private_data;
9336 char buf[64];
9337 int r;
9338
9339 r = tr->buffer_percent;
9340 r = sprintf(buf, "%d\n", r);
9341
9342 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9343 }
9344
9345 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9346 buffer_percent_write(struct file *filp, const char __user *ubuf,
9347 size_t cnt, loff_t *ppos)
9348 {
9349 struct trace_array *tr = filp->private_data;
9350 unsigned long val;
9351 int ret;
9352
9353 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9354 if (ret)
9355 return ret;
9356
9357 if (val > 100)
9358 return -EINVAL;
9359
9360 tr->buffer_percent = val;
9361
9362 (*ppos)++;
9363
9364 return cnt;
9365 }
9366
9367 static const struct file_operations buffer_percent_fops = {
9368 .open = tracing_open_generic_tr,
9369 .read = buffer_percent_read,
9370 .write = buffer_percent_write,
9371 .release = tracing_release_generic_tr,
9372 .llseek = default_llseek,
9373 };
9374
9375 static struct dentry *trace_instance_dir;
9376
9377 static void
9378 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9379
9380 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9381 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9382 {
9383 enum ring_buffer_flags rb_flags;
9384
9385 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9386
9387 buf->tr = tr;
9388
9389 buf->buffer = ring_buffer_alloc(size, rb_flags);
9390 if (!buf->buffer)
9391 return -ENOMEM;
9392
9393 buf->data = alloc_percpu(struct trace_array_cpu);
9394 if (!buf->data) {
9395 ring_buffer_free(buf->buffer);
9396 buf->buffer = NULL;
9397 return -ENOMEM;
9398 }
9399
9400 /* Allocate the first page for all buffers */
9401 set_buffer_entries(&tr->array_buffer,
9402 ring_buffer_size(tr->array_buffer.buffer, 0));
9403
9404 return 0;
9405 }
9406
free_trace_buffer(struct array_buffer * buf)9407 static void free_trace_buffer(struct array_buffer *buf)
9408 {
9409 if (buf->buffer) {
9410 ring_buffer_free(buf->buffer);
9411 buf->buffer = NULL;
9412 free_percpu(buf->data);
9413 buf->data = NULL;
9414 }
9415 }
9416
allocate_trace_buffers(struct trace_array * tr,int size)9417 static int allocate_trace_buffers(struct trace_array *tr, int size)
9418 {
9419 int ret;
9420
9421 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9422 if (ret)
9423 return ret;
9424
9425 #ifdef CONFIG_TRACER_MAX_TRACE
9426 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9427 allocate_snapshot ? size : 1);
9428 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9429 free_trace_buffer(&tr->array_buffer);
9430 return -ENOMEM;
9431 }
9432 tr->allocated_snapshot = allocate_snapshot;
9433
9434 allocate_snapshot = false;
9435 #endif
9436
9437 return 0;
9438 }
9439
free_trace_buffers(struct trace_array * tr)9440 static void free_trace_buffers(struct trace_array *tr)
9441 {
9442 if (!tr)
9443 return;
9444
9445 free_trace_buffer(&tr->array_buffer);
9446
9447 #ifdef CONFIG_TRACER_MAX_TRACE
9448 free_trace_buffer(&tr->max_buffer);
9449 #endif
9450 }
9451
init_trace_flags_index(struct trace_array * tr)9452 static void init_trace_flags_index(struct trace_array *tr)
9453 {
9454 int i;
9455
9456 /* Used by the trace options files */
9457 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9458 tr->trace_flags_index[i] = i;
9459 }
9460
__update_tracer_options(struct trace_array * tr)9461 static void __update_tracer_options(struct trace_array *tr)
9462 {
9463 struct tracer *t;
9464
9465 for (t = trace_types; t; t = t->next)
9466 add_tracer_options(tr, t);
9467 }
9468
update_tracer_options(struct trace_array * tr)9469 static void update_tracer_options(struct trace_array *tr)
9470 {
9471 mutex_lock(&trace_types_lock);
9472 tracer_options_updated = true;
9473 __update_tracer_options(tr);
9474 mutex_unlock(&trace_types_lock);
9475 }
9476
9477 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9478 struct trace_array *trace_array_find(const char *instance)
9479 {
9480 struct trace_array *tr, *found = NULL;
9481
9482 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9483 if (tr->name && strcmp(tr->name, instance) == 0) {
9484 found = tr;
9485 break;
9486 }
9487 }
9488
9489 return found;
9490 }
9491
trace_array_find_get(const char * instance)9492 struct trace_array *trace_array_find_get(const char *instance)
9493 {
9494 struct trace_array *tr;
9495
9496 mutex_lock(&trace_types_lock);
9497 tr = trace_array_find(instance);
9498 if (tr)
9499 tr->ref++;
9500 mutex_unlock(&trace_types_lock);
9501
9502 return tr;
9503 }
9504
trace_array_create_dir(struct trace_array * tr)9505 static int trace_array_create_dir(struct trace_array *tr)
9506 {
9507 int ret;
9508
9509 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9510 if (!tr->dir)
9511 return -EINVAL;
9512
9513 ret = event_trace_add_tracer(tr->dir, tr);
9514 if (ret) {
9515 tracefs_remove(tr->dir);
9516 return ret;
9517 }
9518
9519 init_tracer_tracefs(tr, tr->dir);
9520 __update_tracer_options(tr);
9521
9522 return ret;
9523 }
9524
trace_array_create(const char * name)9525 static struct trace_array *trace_array_create(const char *name)
9526 {
9527 struct trace_array *tr;
9528 int ret;
9529
9530 ret = -ENOMEM;
9531 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9532 if (!tr)
9533 return ERR_PTR(ret);
9534
9535 tr->name = kstrdup(name, GFP_KERNEL);
9536 if (!tr->name)
9537 goto out_free_tr;
9538
9539 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9540 goto out_free_tr;
9541
9542 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9543 goto out_free_tr;
9544
9545 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9546
9547 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9548
9549 raw_spin_lock_init(&tr->start_lock);
9550
9551 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9552
9553 tr->current_trace = &nop_trace;
9554
9555 INIT_LIST_HEAD(&tr->systems);
9556 INIT_LIST_HEAD(&tr->events);
9557 INIT_LIST_HEAD(&tr->hist_vars);
9558 INIT_LIST_HEAD(&tr->err_log);
9559
9560 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9561 goto out_free_tr;
9562
9563 if (ftrace_allocate_ftrace_ops(tr) < 0)
9564 goto out_free_tr;
9565
9566 ftrace_init_trace_array(tr);
9567
9568 init_trace_flags_index(tr);
9569
9570 if (trace_instance_dir) {
9571 ret = trace_array_create_dir(tr);
9572 if (ret)
9573 goto out_free_tr;
9574 } else
9575 __trace_early_add_events(tr);
9576
9577 list_add(&tr->list, &ftrace_trace_arrays);
9578
9579 tr->ref++;
9580
9581 return tr;
9582
9583 out_free_tr:
9584 ftrace_free_ftrace_ops(tr);
9585 free_trace_buffers(tr);
9586 free_cpumask_var(tr->pipe_cpumask);
9587 free_cpumask_var(tr->tracing_cpumask);
9588 kfree(tr->name);
9589 kfree(tr);
9590
9591 return ERR_PTR(ret);
9592 }
9593
instance_mkdir(const char * name)9594 static int instance_mkdir(const char *name)
9595 {
9596 struct trace_array *tr;
9597 int ret;
9598
9599 mutex_lock(&event_mutex);
9600 mutex_lock(&trace_types_lock);
9601
9602 ret = -EEXIST;
9603 if (trace_array_find(name))
9604 goto out_unlock;
9605
9606 tr = trace_array_create(name);
9607
9608 ret = PTR_ERR_OR_ZERO(tr);
9609
9610 out_unlock:
9611 mutex_unlock(&trace_types_lock);
9612 mutex_unlock(&event_mutex);
9613 return ret;
9614 }
9615
9616 /**
9617 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9618 * @name: The name of the trace array to be looked up/created.
9619 *
9620 * Returns pointer to trace array with given name.
9621 * NULL, if it cannot be created.
9622 *
9623 * NOTE: This function increments the reference counter associated with the
9624 * trace array returned. This makes sure it cannot be freed while in use.
9625 * Use trace_array_put() once the trace array is no longer needed.
9626 * If the trace_array is to be freed, trace_array_destroy() needs to
9627 * be called after the trace_array_put(), or simply let user space delete
9628 * it from the tracefs instances directory. But until the
9629 * trace_array_put() is called, user space can not delete it.
9630 *
9631 */
trace_array_get_by_name(const char * name)9632 struct trace_array *trace_array_get_by_name(const char *name)
9633 {
9634 struct trace_array *tr;
9635
9636 mutex_lock(&event_mutex);
9637 mutex_lock(&trace_types_lock);
9638
9639 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9640 if (tr->name && strcmp(tr->name, name) == 0)
9641 goto out_unlock;
9642 }
9643
9644 tr = trace_array_create(name);
9645
9646 if (IS_ERR(tr))
9647 tr = NULL;
9648 out_unlock:
9649 if (tr)
9650 tr->ref++;
9651
9652 mutex_unlock(&trace_types_lock);
9653 mutex_unlock(&event_mutex);
9654 return tr;
9655 }
9656 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9657
__remove_instance(struct trace_array * tr)9658 static int __remove_instance(struct trace_array *tr)
9659 {
9660 int i;
9661
9662 /* Reference counter for a newly created trace array = 1. */
9663 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9664 return -EBUSY;
9665
9666 list_del(&tr->list);
9667
9668 /* Disable all the flags that were enabled coming in */
9669 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9670 if ((1 << i) & ZEROED_TRACE_FLAGS)
9671 set_tracer_flag(tr, 1 << i, 0);
9672 }
9673
9674 tracing_set_nop(tr);
9675 clear_ftrace_function_probes(tr);
9676 event_trace_del_tracer(tr);
9677 ftrace_clear_pids(tr);
9678 ftrace_destroy_function_files(tr);
9679 tracefs_remove(tr->dir);
9680 free_percpu(tr->last_func_repeats);
9681 free_trace_buffers(tr);
9682 clear_tracing_err_log(tr);
9683
9684 for (i = 0; i < tr->nr_topts; i++) {
9685 kfree(tr->topts[i].topts);
9686 }
9687 kfree(tr->topts);
9688
9689 free_cpumask_var(tr->pipe_cpumask);
9690 free_cpumask_var(tr->tracing_cpumask);
9691 kfree(tr->name);
9692 kfree(tr);
9693
9694 return 0;
9695 }
9696
trace_array_destroy(struct trace_array * this_tr)9697 int trace_array_destroy(struct trace_array *this_tr)
9698 {
9699 struct trace_array *tr;
9700 int ret;
9701
9702 if (!this_tr)
9703 return -EINVAL;
9704
9705 mutex_lock(&event_mutex);
9706 mutex_lock(&trace_types_lock);
9707
9708 ret = -ENODEV;
9709
9710 /* Making sure trace array exists before destroying it. */
9711 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9712 if (tr == this_tr) {
9713 ret = __remove_instance(tr);
9714 break;
9715 }
9716 }
9717
9718 mutex_unlock(&trace_types_lock);
9719 mutex_unlock(&event_mutex);
9720
9721 return ret;
9722 }
9723 EXPORT_SYMBOL_GPL(trace_array_destroy);
9724
instance_rmdir(const char * name)9725 static int instance_rmdir(const char *name)
9726 {
9727 struct trace_array *tr;
9728 int ret;
9729
9730 mutex_lock(&event_mutex);
9731 mutex_lock(&trace_types_lock);
9732
9733 ret = -ENODEV;
9734 tr = trace_array_find(name);
9735 if (tr)
9736 ret = __remove_instance(tr);
9737
9738 mutex_unlock(&trace_types_lock);
9739 mutex_unlock(&event_mutex);
9740
9741 return ret;
9742 }
9743
create_trace_instances(struct dentry * d_tracer)9744 static __init void create_trace_instances(struct dentry *d_tracer)
9745 {
9746 struct trace_array *tr;
9747
9748 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9749 instance_mkdir,
9750 instance_rmdir);
9751 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9752 return;
9753
9754 mutex_lock(&event_mutex);
9755 mutex_lock(&trace_types_lock);
9756
9757 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9758 if (!tr->name)
9759 continue;
9760 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9761 "Failed to create instance directory\n"))
9762 break;
9763 }
9764
9765 mutex_unlock(&trace_types_lock);
9766 mutex_unlock(&event_mutex);
9767 }
9768
9769 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9770 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9771 {
9772 int cpu;
9773
9774 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9775 tr, &show_traces_fops);
9776
9777 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9778 tr, &set_tracer_fops);
9779
9780 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9781 tr, &tracing_cpumask_fops);
9782
9783 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9784 tr, &tracing_iter_fops);
9785
9786 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9787 tr, &tracing_fops);
9788
9789 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9790 tr, &tracing_pipe_fops);
9791
9792 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9793 tr, &tracing_entries_fops);
9794
9795 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9796 tr, &tracing_total_entries_fops);
9797
9798 trace_create_file("free_buffer", 0200, d_tracer,
9799 tr, &tracing_free_buffer_fops);
9800
9801 trace_create_file("trace_marker", 0220, d_tracer,
9802 tr, &tracing_mark_fops);
9803
9804 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9805
9806 trace_create_file("trace_marker_raw", 0220, d_tracer,
9807 tr, &tracing_mark_raw_fops);
9808
9809 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9810 &trace_clock_fops);
9811
9812 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9813 tr, &rb_simple_fops);
9814
9815 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9816 &trace_time_stamp_mode_fops);
9817
9818 tr->buffer_percent = 50;
9819
9820 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9821 tr, &buffer_percent_fops);
9822
9823 create_trace_options_dir(tr);
9824
9825 #ifdef CONFIG_TRACER_MAX_TRACE
9826 trace_create_maxlat_file(tr, d_tracer);
9827 #endif
9828
9829 if (ftrace_create_function_files(tr, d_tracer))
9830 MEM_FAIL(1, "Could not allocate function filter files");
9831
9832 #ifdef CONFIG_TRACER_SNAPSHOT
9833 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9834 tr, &snapshot_fops);
9835 #endif
9836
9837 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9838 tr, &tracing_err_log_fops);
9839
9840 for_each_tracing_cpu(cpu)
9841 tracing_init_tracefs_percpu(tr, cpu);
9842
9843 ftrace_init_tracefs(tr, d_tracer);
9844 }
9845
trace_automount(struct dentry * mntpt,void * ingore)9846 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9847 {
9848 struct vfsmount *mnt;
9849 struct file_system_type *type;
9850
9851 /*
9852 * To maintain backward compatibility for tools that mount
9853 * debugfs to get to the tracing facility, tracefs is automatically
9854 * mounted to the debugfs/tracing directory.
9855 */
9856 type = get_fs_type("tracefs");
9857 if (!type)
9858 return NULL;
9859 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9860 put_filesystem(type);
9861 if (IS_ERR(mnt))
9862 return NULL;
9863 mntget(mnt);
9864
9865 return mnt;
9866 }
9867
9868 /**
9869 * tracing_init_dentry - initialize top level trace array
9870 *
9871 * This is called when creating files or directories in the tracing
9872 * directory. It is called via fs_initcall() by any of the boot up code
9873 * and expects to return the dentry of the top level tracing directory.
9874 */
tracing_init_dentry(void)9875 int tracing_init_dentry(void)
9876 {
9877 struct trace_array *tr = &global_trace;
9878
9879 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9880 pr_warn("Tracing disabled due to lockdown\n");
9881 return -EPERM;
9882 }
9883
9884 /* The top level trace array uses NULL as parent */
9885 if (tr->dir)
9886 return 0;
9887
9888 if (WARN_ON(!tracefs_initialized()))
9889 return -ENODEV;
9890
9891 /*
9892 * As there may still be users that expect the tracing
9893 * files to exist in debugfs/tracing, we must automount
9894 * the tracefs file system there, so older tools still
9895 * work with the newer kernel.
9896 */
9897 tr->dir = debugfs_create_automount("tracing", NULL,
9898 trace_automount, NULL);
9899
9900 return 0;
9901 }
9902
9903 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9904 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9905
9906 static struct workqueue_struct *eval_map_wq __initdata;
9907 static struct work_struct eval_map_work __initdata;
9908 static struct work_struct tracerfs_init_work __initdata;
9909
eval_map_work_func(struct work_struct * work)9910 static void __init eval_map_work_func(struct work_struct *work)
9911 {
9912 int len;
9913
9914 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9915 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9916 }
9917
trace_eval_init(void)9918 static int __init trace_eval_init(void)
9919 {
9920 INIT_WORK(&eval_map_work, eval_map_work_func);
9921
9922 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9923 if (!eval_map_wq) {
9924 pr_err("Unable to allocate eval_map_wq\n");
9925 /* Do work here */
9926 eval_map_work_func(&eval_map_work);
9927 return -ENOMEM;
9928 }
9929
9930 queue_work(eval_map_wq, &eval_map_work);
9931 return 0;
9932 }
9933
9934 subsys_initcall(trace_eval_init);
9935
trace_eval_sync(void)9936 static int __init trace_eval_sync(void)
9937 {
9938 /* Make sure the eval map updates are finished */
9939 if (eval_map_wq)
9940 destroy_workqueue(eval_map_wq);
9941 return 0;
9942 }
9943
9944 late_initcall_sync(trace_eval_sync);
9945
9946
9947 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9948 static void trace_module_add_evals(struct module *mod)
9949 {
9950 if (!mod->num_trace_evals)
9951 return;
9952
9953 /*
9954 * Modules with bad taint do not have events created, do
9955 * not bother with enums either.
9956 */
9957 if (trace_module_has_bad_taint(mod))
9958 return;
9959
9960 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9961 }
9962
9963 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9964 static void trace_module_remove_evals(struct module *mod)
9965 {
9966 union trace_eval_map_item *map;
9967 union trace_eval_map_item **last = &trace_eval_maps;
9968
9969 if (!mod->num_trace_evals)
9970 return;
9971
9972 mutex_lock(&trace_eval_mutex);
9973
9974 map = trace_eval_maps;
9975
9976 while (map) {
9977 if (map->head.mod == mod)
9978 break;
9979 map = trace_eval_jmp_to_tail(map);
9980 last = &map->tail.next;
9981 map = map->tail.next;
9982 }
9983 if (!map)
9984 goto out;
9985
9986 *last = trace_eval_jmp_to_tail(map)->tail.next;
9987 kfree(map);
9988 out:
9989 mutex_unlock(&trace_eval_mutex);
9990 }
9991 #else
trace_module_remove_evals(struct module * mod)9992 static inline void trace_module_remove_evals(struct module *mod) { }
9993 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9994
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9995 static int trace_module_notify(struct notifier_block *self,
9996 unsigned long val, void *data)
9997 {
9998 struct module *mod = data;
9999
10000 switch (val) {
10001 case MODULE_STATE_COMING:
10002 trace_module_add_evals(mod);
10003 break;
10004 case MODULE_STATE_GOING:
10005 trace_module_remove_evals(mod);
10006 break;
10007 }
10008
10009 return NOTIFY_OK;
10010 }
10011
10012 static struct notifier_block trace_module_nb = {
10013 .notifier_call = trace_module_notify,
10014 .priority = 0,
10015 };
10016 #endif /* CONFIG_MODULES */
10017
tracer_init_tracefs_work_func(struct work_struct * work)10018 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10019 {
10020
10021 event_trace_init();
10022
10023 init_tracer_tracefs(&global_trace, NULL);
10024 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10025
10026 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10027 &global_trace, &tracing_thresh_fops);
10028
10029 trace_create_file("README", TRACE_MODE_READ, NULL,
10030 NULL, &tracing_readme_fops);
10031
10032 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10033 NULL, &tracing_saved_cmdlines_fops);
10034
10035 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10036 NULL, &tracing_saved_cmdlines_size_fops);
10037
10038 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10039 NULL, &tracing_saved_tgids_fops);
10040
10041 trace_create_eval_file(NULL);
10042
10043 #ifdef CONFIG_MODULES
10044 register_module_notifier(&trace_module_nb);
10045 #endif
10046
10047 #ifdef CONFIG_DYNAMIC_FTRACE
10048 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10049 NULL, &tracing_dyn_info_fops);
10050 #endif
10051
10052 create_trace_instances(NULL);
10053
10054 update_tracer_options(&global_trace);
10055 }
10056
tracer_init_tracefs(void)10057 static __init int tracer_init_tracefs(void)
10058 {
10059 int ret;
10060
10061 trace_access_lock_init();
10062
10063 ret = tracing_init_dentry();
10064 if (ret)
10065 return 0;
10066
10067 if (eval_map_wq) {
10068 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10069 queue_work(eval_map_wq, &tracerfs_init_work);
10070 } else {
10071 tracer_init_tracefs_work_func(NULL);
10072 }
10073
10074 rv_init_interface();
10075
10076 return 0;
10077 }
10078
10079 fs_initcall(tracer_init_tracefs);
10080
10081 static int trace_die_panic_handler(struct notifier_block *self,
10082 unsigned long ev, void *unused);
10083
10084 static struct notifier_block trace_panic_notifier = {
10085 .notifier_call = trace_die_panic_handler,
10086 .priority = INT_MAX - 1,
10087 };
10088
10089 static struct notifier_block trace_die_notifier = {
10090 .notifier_call = trace_die_panic_handler,
10091 .priority = INT_MAX - 1,
10092 };
10093
10094 /*
10095 * The idea is to execute the following die/panic callback early, in order
10096 * to avoid showing irrelevant information in the trace (like other panic
10097 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10098 * warnings get disabled (to prevent potential log flooding).
10099 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10100 static int trace_die_panic_handler(struct notifier_block *self,
10101 unsigned long ev, void *unused)
10102 {
10103 bool ftrace_check = false;
10104
10105 trace_android_vh_ftrace_oops_enter(&ftrace_check);
10106
10107 if (!ftrace_dump_on_oops || ftrace_check)
10108 return NOTIFY_DONE;
10109
10110 /* The die notifier requires DIE_OOPS to trigger */
10111 if (self == &trace_die_notifier && ev != DIE_OOPS)
10112 return NOTIFY_DONE;
10113
10114 ftrace_dump(ftrace_dump_on_oops);
10115
10116 trace_android_vh_ftrace_oops_exit(&ftrace_check);
10117 return NOTIFY_DONE;
10118 }
10119
10120 /*
10121 * printk is set to max of 1024, we really don't need it that big.
10122 * Nothing should be printing 1000 characters anyway.
10123 */
10124 #define TRACE_MAX_PRINT 1000
10125
10126 /*
10127 * Define here KERN_TRACE so that we have one place to modify
10128 * it if we decide to change what log level the ftrace dump
10129 * should be at.
10130 */
10131 #define KERN_TRACE KERN_EMERG
10132
10133 void
trace_printk_seq(struct trace_seq * s)10134 trace_printk_seq(struct trace_seq *s)
10135 {
10136 bool dump_printk = true;
10137
10138 /* Probably should print a warning here. */
10139 if (s->seq.len >= TRACE_MAX_PRINT)
10140 s->seq.len = TRACE_MAX_PRINT;
10141
10142 /*
10143 * More paranoid code. Although the buffer size is set to
10144 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10145 * an extra layer of protection.
10146 */
10147 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10148 s->seq.len = s->seq.size - 1;
10149
10150 /* should be zero ended, but we are paranoid. */
10151 s->buffer[s->seq.len] = 0;
10152
10153 trace_android_vh_ftrace_dump_buffer(s, &dump_printk);
10154 if (dump_printk)
10155 printk(KERN_TRACE "%s", s->buffer);
10156
10157 trace_seq_init(s);
10158 }
10159
trace_init_global_iter(struct trace_iterator * iter)10160 void trace_init_global_iter(struct trace_iterator *iter)
10161 {
10162 iter->tr = &global_trace;
10163 iter->trace = iter->tr->current_trace;
10164 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10165 iter->array_buffer = &global_trace.array_buffer;
10166
10167 if (iter->trace && iter->trace->open)
10168 iter->trace->open(iter);
10169
10170 /* Annotate start of buffers if we had overruns */
10171 if (ring_buffer_overruns(iter->array_buffer->buffer))
10172 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10173
10174 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10175 if (trace_clocks[iter->tr->clock_id].in_ns)
10176 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10177
10178 /* Can not use kmalloc for iter.temp and iter.fmt */
10179 iter->temp = static_temp_buf;
10180 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10181 iter->fmt = static_fmt_buf;
10182 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10183 }
10184
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10185 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10186 {
10187 /* use static because iter can be a bit big for the stack */
10188 static struct trace_iterator iter;
10189 static atomic_t dump_running;
10190 struct trace_array *tr = &global_trace;
10191 unsigned int old_userobj;
10192 unsigned long flags;
10193 int cnt = 0, cpu;
10194 bool ftrace_check = true;
10195 bool ftrace_size_check = false;
10196 unsigned long size;
10197
10198 /* Only allow one dump user at a time. */
10199 if (atomic_inc_return(&dump_running) != 1) {
10200 atomic_dec(&dump_running);
10201 return;
10202 }
10203
10204 /*
10205 * Always turn off tracing when we dump.
10206 * We don't need to show trace output of what happens
10207 * between multiple crashes.
10208 *
10209 * If the user does a sysrq-z, then they can re-enable
10210 * tracing with echo 1 > tracing_on.
10211 */
10212 tracing_off();
10213
10214 local_irq_save(flags);
10215
10216 /* Simulate the iterator */
10217 trace_init_global_iter(&iter);
10218
10219 for_each_tracing_cpu(cpu) {
10220 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10221 size = ring_buffer_size(iter.array_buffer->buffer, cpu);
10222 trace_android_vh_ftrace_size_check(size, &ftrace_size_check);
10223 }
10224
10225 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10226
10227 /* don't look at user memory in panic mode */
10228 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10229
10230 if (ftrace_size_check)
10231 goto out_enable;
10232
10233 switch (oops_dump_mode) {
10234 case DUMP_ALL:
10235 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10236 break;
10237 case DUMP_ORIG:
10238 iter.cpu_file = raw_smp_processor_id();
10239 break;
10240 case DUMP_NONE:
10241 goto out_enable;
10242 default:
10243 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10244 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10245 }
10246
10247 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10248
10249 /* Did function tracer already get disabled? */
10250 if (ftrace_is_dead()) {
10251 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10252 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10253 }
10254
10255 /*
10256 * Ftrace timestmap support two types:
10257 * - ftrace_check = 1, latency format, start with 0 from a specific time.
10258 * - ftrace_check = 0, absolute time format, consistent with kernel time.
10259 * With this vendor hook, we can choose the format from different requirement.
10260 */
10261 trace_android_vh_ftrace_format_check(&ftrace_check);
10262
10263 /*
10264 * We need to stop all tracing on all CPUS to read
10265 * the next buffer. This is a bit expensive, but is
10266 * not done often. We fill all what we can read,
10267 * and then release the locks again.
10268 */
10269
10270 while (!trace_empty(&iter)) {
10271 if (!cnt)
10272 printk(KERN_TRACE "---------------------------------\n");
10273
10274 cnt++;
10275
10276 trace_iterator_reset(&iter);
10277 if (ftrace_check)
10278 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10279
10280 if (trace_find_next_entry_inc(&iter) != NULL) {
10281 int ret;
10282
10283 ret = print_trace_line(&iter);
10284 if (ret != TRACE_TYPE_NO_CONSUME)
10285 trace_consume(&iter);
10286 }
10287 touch_nmi_watchdog();
10288
10289 trace_printk_seq(&iter.seq);
10290 }
10291
10292 if (!cnt)
10293 printk(KERN_TRACE " (ftrace buffer empty)\n");
10294 else
10295 printk(KERN_TRACE "---------------------------------\n");
10296
10297 out_enable:
10298 tr->trace_flags |= old_userobj;
10299
10300 for_each_tracing_cpu(cpu) {
10301 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10302 }
10303 atomic_dec(&dump_running);
10304 local_irq_restore(flags);
10305 }
10306 EXPORT_SYMBOL_GPL(ftrace_dump);
10307
10308 #define WRITE_BUFSIZE 4096
10309
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10310 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10311 size_t count, loff_t *ppos,
10312 int (*createfn)(const char *))
10313 {
10314 char *kbuf, *buf, *tmp;
10315 int ret = 0;
10316 size_t done = 0;
10317 size_t size;
10318
10319 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10320 if (!kbuf)
10321 return -ENOMEM;
10322
10323 while (done < count) {
10324 size = count - done;
10325
10326 if (size >= WRITE_BUFSIZE)
10327 size = WRITE_BUFSIZE - 1;
10328
10329 if (copy_from_user(kbuf, buffer + done, size)) {
10330 ret = -EFAULT;
10331 goto out;
10332 }
10333 kbuf[size] = '\0';
10334 buf = kbuf;
10335 do {
10336 tmp = strchr(buf, '\n');
10337 if (tmp) {
10338 *tmp = '\0';
10339 size = tmp - buf + 1;
10340 } else {
10341 size = strlen(buf);
10342 if (done + size < count) {
10343 if (buf != kbuf)
10344 break;
10345 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10346 pr_warn("Line length is too long: Should be less than %d\n",
10347 WRITE_BUFSIZE - 2);
10348 ret = -EINVAL;
10349 goto out;
10350 }
10351 }
10352 done += size;
10353
10354 /* Remove comments */
10355 tmp = strchr(buf, '#');
10356
10357 if (tmp)
10358 *tmp = '\0';
10359
10360 ret = createfn(buf);
10361 if (ret)
10362 goto out;
10363 buf += size;
10364
10365 } while (done < count);
10366 }
10367 ret = done;
10368
10369 out:
10370 kfree(kbuf);
10371
10372 return ret;
10373 }
10374
10375 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10376 __init static bool tr_needs_alloc_snapshot(const char *name)
10377 {
10378 char *test;
10379 int len = strlen(name);
10380 bool ret;
10381
10382 if (!boot_snapshot_index)
10383 return false;
10384
10385 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10386 boot_snapshot_info[len] == '\t')
10387 return true;
10388
10389 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10390 if (!test)
10391 return false;
10392
10393 sprintf(test, "\t%s\t", name);
10394 ret = strstr(boot_snapshot_info, test) == NULL;
10395 kfree(test);
10396 return ret;
10397 }
10398
do_allocate_snapshot(const char * name)10399 __init static void do_allocate_snapshot(const char *name)
10400 {
10401 if (!tr_needs_alloc_snapshot(name))
10402 return;
10403
10404 /*
10405 * When allocate_snapshot is set, the next call to
10406 * allocate_trace_buffers() (called by trace_array_get_by_name())
10407 * will allocate the snapshot buffer. That will alse clear
10408 * this flag.
10409 */
10410 allocate_snapshot = true;
10411 }
10412 #else
do_allocate_snapshot(const char * name)10413 static inline void do_allocate_snapshot(const char *name) { }
10414 #endif
10415
enable_instances(void)10416 __init static void enable_instances(void)
10417 {
10418 struct trace_array *tr;
10419 char *curr_str;
10420 char *str;
10421 char *tok;
10422
10423 /* A tab is always appended */
10424 boot_instance_info[boot_instance_index - 1] = '\0';
10425 str = boot_instance_info;
10426
10427 while ((curr_str = strsep(&str, "\t"))) {
10428
10429 tok = strsep(&curr_str, ",");
10430
10431 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10432 do_allocate_snapshot(tok);
10433
10434 tr = trace_array_get_by_name(tok);
10435 if (!tr) {
10436 pr_warn("Failed to create instance buffer %s\n", curr_str);
10437 continue;
10438 }
10439 /* Allow user space to delete it */
10440 trace_array_put(tr);
10441
10442 while ((tok = strsep(&curr_str, ","))) {
10443 early_enable_events(tr, tok, true);
10444 }
10445 }
10446 }
10447
tracer_alloc_buffers(void)10448 __init static int tracer_alloc_buffers(void)
10449 {
10450 int ring_buf_size;
10451 int ret = -ENOMEM;
10452
10453
10454 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10455 pr_warn("Tracing disabled due to lockdown\n");
10456 return -EPERM;
10457 }
10458
10459 /*
10460 * Make sure we don't accidentally add more trace options
10461 * than we have bits for.
10462 */
10463 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10464
10465 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10466 goto out;
10467
10468 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10469 goto out_free_buffer_mask;
10470
10471 /* Only allocate trace_printk buffers if a trace_printk exists */
10472 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10473 /* Must be called before global_trace.buffer is allocated */
10474 trace_printk_init_buffers();
10475
10476 /* To save memory, keep the ring buffer size to its minimum */
10477 if (ring_buffer_expanded)
10478 ring_buf_size = trace_buf_size;
10479 else
10480 ring_buf_size = 1;
10481
10482 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10483 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10484
10485 raw_spin_lock_init(&global_trace.start_lock);
10486
10487 /*
10488 * The prepare callbacks allocates some memory for the ring buffer. We
10489 * don't free the buffer if the CPU goes down. If we were to free
10490 * the buffer, then the user would lose any trace that was in the
10491 * buffer. The memory will be removed once the "instance" is removed.
10492 */
10493 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10494 "trace/RB:prepare", trace_rb_cpu_prepare,
10495 NULL);
10496 if (ret < 0)
10497 goto out_free_cpumask;
10498 /* Used for event triggers */
10499 ret = -ENOMEM;
10500 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10501 if (!temp_buffer)
10502 goto out_rm_hp_state;
10503
10504 if (trace_create_savedcmd() < 0)
10505 goto out_free_temp_buffer;
10506
10507 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10508 goto out_free_savedcmd;
10509
10510 /* TODO: make the number of buffers hot pluggable with CPUS */
10511 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10512 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10513 goto out_free_pipe_cpumask;
10514 }
10515 if (global_trace.buffer_disabled)
10516 tracing_off();
10517
10518 if (trace_boot_clock) {
10519 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10520 if (ret < 0)
10521 pr_warn("Trace clock %s not defined, going back to default\n",
10522 trace_boot_clock);
10523 }
10524
10525 /*
10526 * register_tracer() might reference current_trace, so it
10527 * needs to be set before we register anything. This is
10528 * just a bootstrap of current_trace anyway.
10529 */
10530 global_trace.current_trace = &nop_trace;
10531
10532 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10533
10534 ftrace_init_global_array_ops(&global_trace);
10535
10536 init_trace_flags_index(&global_trace);
10537
10538 register_tracer(&nop_trace);
10539
10540 /* Function tracing may start here (via kernel command line) */
10541 init_function_trace();
10542
10543 /* All seems OK, enable tracing */
10544 tracing_disabled = 0;
10545
10546 atomic_notifier_chain_register(&panic_notifier_list,
10547 &trace_panic_notifier);
10548
10549 register_die_notifier(&trace_die_notifier);
10550
10551 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10552
10553 INIT_LIST_HEAD(&global_trace.systems);
10554 INIT_LIST_HEAD(&global_trace.events);
10555 INIT_LIST_HEAD(&global_trace.hist_vars);
10556 INIT_LIST_HEAD(&global_trace.err_log);
10557 list_add(&global_trace.list, &ftrace_trace_arrays);
10558
10559 apply_trace_boot_options();
10560
10561 register_snapshot_cmd();
10562
10563 test_can_verify();
10564
10565 return 0;
10566
10567 out_free_pipe_cpumask:
10568 free_cpumask_var(global_trace.pipe_cpumask);
10569 out_free_savedcmd:
10570 free_saved_cmdlines_buffer(savedcmd);
10571 out_free_temp_buffer:
10572 ring_buffer_free(temp_buffer);
10573 out_rm_hp_state:
10574 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10575 out_free_cpumask:
10576 free_cpumask_var(global_trace.tracing_cpumask);
10577 out_free_buffer_mask:
10578 free_cpumask_var(tracing_buffer_mask);
10579 out:
10580 return ret;
10581 }
10582
ftrace_boot_snapshot(void)10583 void __init ftrace_boot_snapshot(void)
10584 {
10585 #ifdef CONFIG_TRACER_MAX_TRACE
10586 struct trace_array *tr;
10587
10588 if (!snapshot_at_boot)
10589 return;
10590
10591 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10592 if (!tr->allocated_snapshot)
10593 continue;
10594
10595 tracing_snapshot_instance(tr);
10596 trace_array_puts(tr, "** Boot snapshot taken **\n");
10597 }
10598 #endif
10599 }
10600
early_trace_init(void)10601 void __init early_trace_init(void)
10602 {
10603 if (tracepoint_printk) {
10604 tracepoint_print_iter =
10605 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10606 if (MEM_FAIL(!tracepoint_print_iter,
10607 "Failed to allocate trace iterator\n"))
10608 tracepoint_printk = 0;
10609 else
10610 static_key_enable(&tracepoint_printk_key.key);
10611 }
10612 tracer_alloc_buffers();
10613
10614 init_events();
10615 }
10616
trace_init(void)10617 void __init trace_init(void)
10618 {
10619 trace_event_init();
10620
10621 if (boot_instance_index)
10622 enable_instances();
10623 }
10624
clear_boot_tracer(void)10625 __init static void clear_boot_tracer(void)
10626 {
10627 /*
10628 * The default tracer at boot buffer is an init section.
10629 * This function is called in lateinit. If we did not
10630 * find the boot tracer, then clear it out, to prevent
10631 * later registration from accessing the buffer that is
10632 * about to be freed.
10633 */
10634 if (!default_bootup_tracer)
10635 return;
10636
10637 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10638 default_bootup_tracer);
10639 default_bootup_tracer = NULL;
10640 }
10641
10642 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10643 __init static void tracing_set_default_clock(void)
10644 {
10645 /* sched_clock_stable() is determined in late_initcall */
10646 if (!trace_boot_clock && !sched_clock_stable()) {
10647 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10648 pr_warn("Can not set tracing clock due to lockdown\n");
10649 return;
10650 }
10651
10652 printk(KERN_WARNING
10653 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10654 "If you want to keep using the local clock, then add:\n"
10655 " \"trace_clock=local\"\n"
10656 "on the kernel command line\n");
10657 tracing_set_clock(&global_trace, "global");
10658 }
10659 }
10660 #else
tracing_set_default_clock(void)10661 static inline void tracing_set_default_clock(void) { }
10662 #endif
10663
late_trace_init(void)10664 __init static int late_trace_init(void)
10665 {
10666 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10667 static_key_disable(&tracepoint_printk_key.key);
10668 tracepoint_printk = 0;
10669 }
10670
10671 tracing_set_default_clock();
10672 clear_boot_tracer();
10673 return 0;
10674 }
10675
10676 late_initcall_sync(late_trace_init);
10677