1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49
50 #include "trace.h"
51 #include "trace_output.h"
52
53 /*
54 * On boot up, the ring buffer is set to the minimum size, so that
55 * we do not waste memory on systems that are not using tracing.
56 */
57 bool ring_buffer_expanded;
58
59 /*
60 * We need to change this state when a selftest is running.
61 * A selftest will lurk into the ring-buffer to count the
62 * entries inserted during the selftest although some concurrent
63 * insertions into the ring-buffer such as trace_printk could occurred
64 * at the same time, giving false positive or negative results.
65 */
66 static bool __read_mostly tracing_selftest_running;
67
68 /*
69 * If a tracer is running, we do not want to run SELFTEST.
70 */
71 bool __read_mostly tracing_selftest_disabled;
72
73 /* Pipe tracepoints to printk */
74 struct trace_iterator *tracepoint_print_iter;
75 int tracepoint_printk;
76 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
77
78 /* For tracers that don't implement custom flags */
79 static struct tracer_opt dummy_tracer_opt[] = {
80 { }
81 };
82
83 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)84 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
85 {
86 return 0;
87 }
88
89 /*
90 * To prevent the comm cache from being overwritten when no
91 * tracing is active, only save the comm when a trace event
92 * occurred.
93 */
94 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
95
96 /*
97 * Kill all tracing for good (never come back).
98 * It is initialized to 1 but will turn to zero if the initialization
99 * of the tracer is successful. But that is the only place that sets
100 * this back to zero.
101 */
102 static int tracing_disabled = 1;
103
104 cpumask_var_t __read_mostly tracing_buffer_mask;
105
106 /*
107 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
108 *
109 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
110 * is set, then ftrace_dump is called. This will output the contents
111 * of the ftrace buffers to the console. This is very useful for
112 * capturing traces that lead to crashes and outputing it to a
113 * serial console.
114 *
115 * It is default off, but you can enable it with either specifying
116 * "ftrace_dump_on_oops" in the kernel command line, or setting
117 * /proc/sys/kernel/ftrace_dump_on_oops
118 * Set 1 if you want to dump buffers of all CPUs
119 * Set 2 if you want to dump the buffer of the CPU that triggered oops
120 */
121
122 enum ftrace_dump_mode ftrace_dump_on_oops;
123
124 /* When set, tracing will stop when a WARN*() is hit */
125 int __disable_trace_on_warning;
126
127 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
128 /* Map of enums to their values, for "eval_map" file */
129 struct trace_eval_map_head {
130 struct module *mod;
131 unsigned long length;
132 };
133
134 union trace_eval_map_item;
135
136 struct trace_eval_map_tail {
137 /*
138 * "end" is first and points to NULL as it must be different
139 * than "mod" or "eval_string"
140 */
141 union trace_eval_map_item *next;
142 const char *end; /* points to NULL */
143 };
144
145 static DEFINE_MUTEX(trace_eval_mutex);
146
147 /*
148 * The trace_eval_maps are saved in an array with two extra elements,
149 * one at the beginning, and one at the end. The beginning item contains
150 * the count of the saved maps (head.length), and the module they
151 * belong to if not built in (head.mod). The ending item contains a
152 * pointer to the next array of saved eval_map items.
153 */
154 union trace_eval_map_item {
155 struct trace_eval_map map;
156 struct trace_eval_map_head head;
157 struct trace_eval_map_tail tail;
158 };
159
160 static union trace_eval_map_item *trace_eval_maps;
161 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
162
163 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
164 static void ftrace_trace_userstack(struct trace_array *tr,
165 struct ring_buffer *buffer,
166 unsigned long flags, int pc);
167
168 #define MAX_TRACER_SIZE 100
169 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
170 static char *default_bootup_tracer;
171
172 static bool allocate_snapshot;
173
set_cmdline_ftrace(char * str)174 static int __init set_cmdline_ftrace(char *str)
175 {
176 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
177 default_bootup_tracer = bootup_tracer_buf;
178 /* We are using ftrace early, expand it */
179 ring_buffer_expanded = true;
180 return 1;
181 }
182 __setup("ftrace=", set_cmdline_ftrace);
183
set_ftrace_dump_on_oops(char * str)184 static int __init set_ftrace_dump_on_oops(char *str)
185 {
186 if (*str++ != '=' || !*str) {
187 ftrace_dump_on_oops = DUMP_ALL;
188 return 1;
189 }
190
191 if (!strcmp("orig_cpu", str)) {
192 ftrace_dump_on_oops = DUMP_ORIG;
193 return 1;
194 }
195
196 return 0;
197 }
198 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
199
stop_trace_on_warning(char * str)200 static int __init stop_trace_on_warning(char *str)
201 {
202 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
203 __disable_trace_on_warning = 1;
204 return 1;
205 }
206 __setup("traceoff_on_warning", stop_trace_on_warning);
207
boot_alloc_snapshot(char * str)208 static int __init boot_alloc_snapshot(char *str)
209 {
210 allocate_snapshot = true;
211 /* We also need the main ring buffer expanded */
212 ring_buffer_expanded = true;
213 return 1;
214 }
215 __setup("alloc_snapshot", boot_alloc_snapshot);
216
217
218 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
219
set_trace_boot_options(char * str)220 static int __init set_trace_boot_options(char *str)
221 {
222 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
223 return 1;
224 }
225 __setup("trace_options=", set_trace_boot_options);
226
227 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
228 static char *trace_boot_clock __initdata;
229
set_trace_boot_clock(char * str)230 static int __init set_trace_boot_clock(char *str)
231 {
232 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
233 trace_boot_clock = trace_boot_clock_buf;
234 return 1;
235 }
236 __setup("trace_clock=", set_trace_boot_clock);
237
set_tracepoint_printk(char * str)238 static int __init set_tracepoint_printk(char *str)
239 {
240 /* Ignore the "tp_printk_stop_on_boot" param */
241 if (*str == '_')
242 return 0;
243
244 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
245 tracepoint_printk = 1;
246 return 1;
247 }
248 __setup("tp_printk", set_tracepoint_printk);
249
ns2usecs(u64 nsec)250 unsigned long long ns2usecs(u64 nsec)
251 {
252 nsec += 500;
253 do_div(nsec, 1000);
254 return nsec;
255 }
256
257 /* trace_flags holds trace_options default values */
258 #define TRACE_DEFAULT_FLAGS \
259 (FUNCTION_DEFAULT_FLAGS | \
260 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
261 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
262 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
263 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
264
265 /* trace_options that are only supported by global_trace */
266 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
267 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
268
269 /* trace_flags that are default zero for instances */
270 #define ZEROED_TRACE_FLAGS \
271 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
272
273 /*
274 * The global_trace is the descriptor that holds the top-level tracing
275 * buffers for the live tracing.
276 */
277 static struct trace_array global_trace = {
278 .trace_flags = TRACE_DEFAULT_FLAGS,
279 };
280
281 LIST_HEAD(ftrace_trace_arrays);
282
trace_array_get(struct trace_array * this_tr)283 int trace_array_get(struct trace_array *this_tr)
284 {
285 struct trace_array *tr;
286 int ret = -ENODEV;
287
288 mutex_lock(&trace_types_lock);
289 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
290 if (tr == this_tr) {
291 tr->ref++;
292 ret = 0;
293 break;
294 }
295 }
296 mutex_unlock(&trace_types_lock);
297
298 return ret;
299 }
300
__trace_array_put(struct trace_array * this_tr)301 static void __trace_array_put(struct trace_array *this_tr)
302 {
303 WARN_ON(!this_tr->ref);
304 this_tr->ref--;
305 }
306
trace_array_put(struct trace_array * this_tr)307 void trace_array_put(struct trace_array *this_tr)
308 {
309 mutex_lock(&trace_types_lock);
310 __trace_array_put(this_tr);
311 mutex_unlock(&trace_types_lock);
312 }
313
tracing_check_open_get_tr(struct trace_array * tr)314 int tracing_check_open_get_tr(struct trace_array *tr)
315 {
316 int ret;
317
318 ret = security_locked_down(LOCKDOWN_TRACEFS);
319 if (ret)
320 return ret;
321
322 if (tracing_disabled)
323 return -ENODEV;
324
325 if (tr && trace_array_get(tr) < 0)
326 return -ENODEV;
327
328 return 0;
329 }
330
call_filter_check_discard(struct trace_event_call * call,void * rec,struct ring_buffer * buffer,struct ring_buffer_event * event)331 int call_filter_check_discard(struct trace_event_call *call, void *rec,
332 struct ring_buffer *buffer,
333 struct ring_buffer_event *event)
334 {
335 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
336 !filter_match_preds(call->filter, rec)) {
337 __trace_event_discard_commit(buffer, event);
338 return 1;
339 }
340
341 return 0;
342 }
343
trace_free_pid_list(struct trace_pid_list * pid_list)344 void trace_free_pid_list(struct trace_pid_list *pid_list)
345 {
346 vfree(pid_list->pids);
347 kfree(pid_list);
348 }
349
350 /**
351 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
352 * @filtered_pids: The list of pids to check
353 * @search_pid: The PID to find in @filtered_pids
354 *
355 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
356 */
357 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)358 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
359 {
360 /*
361 * If pid_max changed after filtered_pids was created, we
362 * by default ignore all pids greater than the previous pid_max.
363 */
364 if (search_pid >= filtered_pids->pid_max)
365 return false;
366
367 return test_bit(search_pid, filtered_pids->pids);
368 }
369
370 /**
371 * trace_ignore_this_task - should a task be ignored for tracing
372 * @filtered_pids: The list of pids to check
373 * @task: The task that should be ignored if not filtered
374 *
375 * Checks if @task should be traced or not from @filtered_pids.
376 * Returns true if @task should *NOT* be traced.
377 * Returns false if @task should be traced.
378 */
379 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct task_struct * task)380 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
381 {
382 /*
383 * Return false, because if filtered_pids does not exist,
384 * all pids are good to trace.
385 */
386 if (!filtered_pids)
387 return false;
388
389 return !trace_find_filtered_pid(filtered_pids, task->pid);
390 }
391
392 /**
393 * trace_filter_add_remove_task - Add or remove a task from a pid_list
394 * @pid_list: The list to modify
395 * @self: The current task for fork or NULL for exit
396 * @task: The task to add or remove
397 *
398 * If adding a task, if @self is defined, the task is only added if @self
399 * is also included in @pid_list. This happens on fork and tasks should
400 * only be added when the parent is listed. If @self is NULL, then the
401 * @task pid will be removed from the list, which would happen on exit
402 * of a task.
403 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)404 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
405 struct task_struct *self,
406 struct task_struct *task)
407 {
408 if (!pid_list)
409 return;
410
411 /* For forks, we only add if the forking task is listed */
412 if (self) {
413 if (!trace_find_filtered_pid(pid_list, self->pid))
414 return;
415 }
416
417 /* Sorry, but we don't support pid_max changing after setting */
418 if (task->pid >= pid_list->pid_max)
419 return;
420
421 /* "self" is set for forks, and NULL for exits */
422 if (self)
423 set_bit(task->pid, pid_list->pids);
424 else
425 clear_bit(task->pid, pid_list->pids);
426 }
427
428 /**
429 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
430 * @pid_list: The pid list to show
431 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
432 * @pos: The position of the file
433 *
434 * This is used by the seq_file "next" operation to iterate the pids
435 * listed in a trace_pid_list structure.
436 *
437 * Returns the pid+1 as we want to display pid of zero, but NULL would
438 * stop the iteration.
439 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)440 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
441 {
442 unsigned long pid = (unsigned long)v;
443
444 (*pos)++;
445
446 /* pid already is +1 of the actual prevous bit */
447 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
448
449 /* Return pid + 1 to allow zero to be represented */
450 if (pid < pid_list->pid_max)
451 return (void *)(pid + 1);
452
453 return NULL;
454 }
455
456 /**
457 * trace_pid_start - Used for seq_file to start reading pid lists
458 * @pid_list: The pid list to show
459 * @pos: The position of the file
460 *
461 * This is used by seq_file "start" operation to start the iteration
462 * of listing pids.
463 *
464 * Returns the pid+1 as we want to display pid of zero, but NULL would
465 * stop the iteration.
466 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)467 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
468 {
469 unsigned long pid;
470 loff_t l = 0;
471
472 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
473 if (pid >= pid_list->pid_max)
474 return NULL;
475
476 /* Return pid + 1 so that zero can be the exit value */
477 for (pid++; pid && l < *pos;
478 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
479 ;
480 return (void *)pid;
481 }
482
483 /**
484 * trace_pid_show - show the current pid in seq_file processing
485 * @m: The seq_file structure to write into
486 * @v: A void pointer of the pid (+1) value to display
487 *
488 * Can be directly used by seq_file operations to display the current
489 * pid value.
490 */
trace_pid_show(struct seq_file * m,void * v)491 int trace_pid_show(struct seq_file *m, void *v)
492 {
493 unsigned long pid = (unsigned long)v - 1;
494
495 seq_printf(m, "%lu\n", pid);
496 return 0;
497 }
498
499 /* 128 should be much more than enough */
500 #define PID_BUF_SIZE 127
501
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)502 int trace_pid_write(struct trace_pid_list *filtered_pids,
503 struct trace_pid_list **new_pid_list,
504 const char __user *ubuf, size_t cnt)
505 {
506 struct trace_pid_list *pid_list;
507 struct trace_parser parser;
508 unsigned long val;
509 int nr_pids = 0;
510 ssize_t read = 0;
511 ssize_t ret = 0;
512 loff_t pos;
513 pid_t pid;
514
515 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
516 return -ENOMEM;
517
518 /*
519 * Always recreate a new array. The write is an all or nothing
520 * operation. Always create a new array when adding new pids by
521 * the user. If the operation fails, then the current list is
522 * not modified.
523 */
524 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
525 if (!pid_list) {
526 trace_parser_put(&parser);
527 return -ENOMEM;
528 }
529
530 pid_list->pid_max = READ_ONCE(pid_max);
531
532 /* Only truncating will shrink pid_max */
533 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
534 pid_list->pid_max = filtered_pids->pid_max;
535
536 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
537 if (!pid_list->pids) {
538 trace_parser_put(&parser);
539 kfree(pid_list);
540 return -ENOMEM;
541 }
542
543 if (filtered_pids) {
544 /* copy the current bits to the new max */
545 for_each_set_bit(pid, filtered_pids->pids,
546 filtered_pids->pid_max) {
547 set_bit(pid, pid_list->pids);
548 nr_pids++;
549 }
550 }
551
552 while (cnt > 0) {
553
554 pos = 0;
555
556 ret = trace_get_user(&parser, ubuf, cnt, &pos);
557 if (ret < 0 || !trace_parser_loaded(&parser))
558 break;
559
560 read += ret;
561 ubuf += ret;
562 cnt -= ret;
563
564 ret = -EINVAL;
565 if (kstrtoul(parser.buffer, 0, &val))
566 break;
567 if (val >= pid_list->pid_max)
568 break;
569
570 pid = (pid_t)val;
571
572 set_bit(pid, pid_list->pids);
573 nr_pids++;
574
575 trace_parser_clear(&parser);
576 ret = 0;
577 }
578 trace_parser_put(&parser);
579
580 if (ret < 0) {
581 trace_free_pid_list(pid_list);
582 return ret;
583 }
584
585 if (!nr_pids) {
586 /* Cleared the list of pids */
587 trace_free_pid_list(pid_list);
588 read = ret;
589 pid_list = NULL;
590 }
591
592 *new_pid_list = pid_list;
593
594 return read;
595 }
596
buffer_ftrace_now(struct trace_buffer * buf,int cpu)597 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
598 {
599 u64 ts;
600
601 /* Early boot up does not have a buffer yet */
602 if (!buf->buffer)
603 return trace_clock_local();
604
605 ts = ring_buffer_time_stamp(buf->buffer, cpu);
606 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
607
608 return ts;
609 }
610
ftrace_now(int cpu)611 u64 ftrace_now(int cpu)
612 {
613 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
614 }
615
616 /**
617 * tracing_is_enabled - Show if global_trace has been disabled
618 *
619 * Shows if the global trace has been enabled or not. It uses the
620 * mirror flag "buffer_disabled" to be used in fast paths such as for
621 * the irqsoff tracer. But it may be inaccurate due to races. If you
622 * need to know the accurate state, use tracing_is_on() which is a little
623 * slower, but accurate.
624 */
tracing_is_enabled(void)625 int tracing_is_enabled(void)
626 {
627 /*
628 * For quick access (irqsoff uses this in fast path), just
629 * return the mirror variable of the state of the ring buffer.
630 * It's a little racy, but we don't really care.
631 */
632 smp_rmb();
633 return !global_trace.buffer_disabled;
634 }
635
636 /*
637 * trace_buf_size is the size in bytes that is allocated
638 * for a buffer. Note, the number of bytes is always rounded
639 * to page size.
640 *
641 * This number is purposely set to a low number of 16384.
642 * If the dump on oops happens, it will be much appreciated
643 * to not have to wait for all that output. Anyway this can be
644 * boot time and run time configurable.
645 */
646 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
647
648 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
649
650 /* trace_types holds a link list of available tracers. */
651 static struct tracer *trace_types __read_mostly;
652
653 /*
654 * trace_types_lock is used to protect the trace_types list.
655 */
656 DEFINE_MUTEX(trace_types_lock);
657
658 /*
659 * serialize the access of the ring buffer
660 *
661 * ring buffer serializes readers, but it is low level protection.
662 * The validity of the events (which returns by ring_buffer_peek() ..etc)
663 * are not protected by ring buffer.
664 *
665 * The content of events may become garbage if we allow other process consumes
666 * these events concurrently:
667 * A) the page of the consumed events may become a normal page
668 * (not reader page) in ring buffer, and this page will be rewrited
669 * by events producer.
670 * B) The page of the consumed events may become a page for splice_read,
671 * and this page will be returned to system.
672 *
673 * These primitives allow multi process access to different cpu ring buffer
674 * concurrently.
675 *
676 * These primitives don't distinguish read-only and read-consume access.
677 * Multi read-only access are also serialized.
678 */
679
680 #ifdef CONFIG_SMP
681 static DECLARE_RWSEM(all_cpu_access_lock);
682 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
683
trace_access_lock(int cpu)684 static inline void trace_access_lock(int cpu)
685 {
686 if (cpu == RING_BUFFER_ALL_CPUS) {
687 /* gain it for accessing the whole ring buffer. */
688 down_write(&all_cpu_access_lock);
689 } else {
690 /* gain it for accessing a cpu ring buffer. */
691
692 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
693 down_read(&all_cpu_access_lock);
694
695 /* Secondly block other access to this @cpu ring buffer. */
696 mutex_lock(&per_cpu(cpu_access_lock, cpu));
697 }
698 }
699
trace_access_unlock(int cpu)700 static inline void trace_access_unlock(int cpu)
701 {
702 if (cpu == RING_BUFFER_ALL_CPUS) {
703 up_write(&all_cpu_access_lock);
704 } else {
705 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
706 up_read(&all_cpu_access_lock);
707 }
708 }
709
trace_access_lock_init(void)710 static inline void trace_access_lock_init(void)
711 {
712 int cpu;
713
714 for_each_possible_cpu(cpu)
715 mutex_init(&per_cpu(cpu_access_lock, cpu));
716 }
717
718 #else
719
720 static DEFINE_MUTEX(access_lock);
721
trace_access_lock(int cpu)722 static inline void trace_access_lock(int cpu)
723 {
724 (void)cpu;
725 mutex_lock(&access_lock);
726 }
727
trace_access_unlock(int cpu)728 static inline void trace_access_unlock(int cpu)
729 {
730 (void)cpu;
731 mutex_unlock(&access_lock);
732 }
733
trace_access_lock_init(void)734 static inline void trace_access_lock_init(void)
735 {
736 }
737
738 #endif
739
740 #ifdef CONFIG_STACKTRACE
741 static void __ftrace_trace_stack(struct ring_buffer *buffer,
742 unsigned long flags,
743 int skip, int pc, struct pt_regs *regs);
744 static inline void ftrace_trace_stack(struct trace_array *tr,
745 struct ring_buffer *buffer,
746 unsigned long flags,
747 int skip, int pc, struct pt_regs *regs);
748
749 #else
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)750 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
751 unsigned long flags,
752 int skip, int pc, struct pt_regs *regs)
753 {
754 }
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)755 static inline void ftrace_trace_stack(struct trace_array *tr,
756 struct ring_buffer *buffer,
757 unsigned long flags,
758 int skip, int pc, struct pt_regs *regs)
759 {
760 }
761
762 #endif
763
764 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)765 trace_event_setup(struct ring_buffer_event *event,
766 int type, unsigned long flags, int pc)
767 {
768 struct trace_entry *ent = ring_buffer_event_data(event);
769
770 tracing_generic_entry_update(ent, type, flags, pc);
771 }
772
773 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)774 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
775 int type,
776 unsigned long len,
777 unsigned long flags, int pc)
778 {
779 struct ring_buffer_event *event;
780
781 event = ring_buffer_lock_reserve(buffer, len);
782 if (event != NULL)
783 trace_event_setup(event, type, flags, pc);
784
785 return event;
786 }
787
tracer_tracing_on(struct trace_array * tr)788 void tracer_tracing_on(struct trace_array *tr)
789 {
790 if (tr->trace_buffer.buffer)
791 ring_buffer_record_on(tr->trace_buffer.buffer);
792 /*
793 * This flag is looked at when buffers haven't been allocated
794 * yet, or by some tracers (like irqsoff), that just want to
795 * know if the ring buffer has been disabled, but it can handle
796 * races of where it gets disabled but we still do a record.
797 * As the check is in the fast path of the tracers, it is more
798 * important to be fast than accurate.
799 */
800 tr->buffer_disabled = 0;
801 /* Make the flag seen by readers */
802 smp_wmb();
803 }
804
805 /**
806 * tracing_on - enable tracing buffers
807 *
808 * This function enables tracing buffers that may have been
809 * disabled with tracing_off.
810 */
tracing_on(void)811 void tracing_on(void)
812 {
813 tracer_tracing_on(&global_trace);
814 }
815 EXPORT_SYMBOL_GPL(tracing_on);
816
817
818 static __always_inline void
__buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)819 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
820 {
821 __this_cpu_write(trace_taskinfo_save, true);
822
823 /* If this is the temp buffer, we need to commit fully */
824 if (this_cpu_read(trace_buffered_event) == event) {
825 /* Length is in event->array[0] */
826 ring_buffer_write(buffer, event->array[0], &event->array[1]);
827 /* Release the temp buffer */
828 this_cpu_dec(trace_buffered_event_cnt);
829 } else
830 ring_buffer_unlock_commit(buffer, event);
831 }
832
833 /**
834 * __trace_puts - write a constant string into the trace buffer.
835 * @ip: The address of the caller
836 * @str: The constant string to write
837 * @size: The size of the string.
838 */
__trace_puts(unsigned long ip,const char * str,int size)839 int __trace_puts(unsigned long ip, const char *str, int size)
840 {
841 struct ring_buffer_event *event;
842 struct ring_buffer *buffer;
843 struct print_entry *entry;
844 unsigned long irq_flags;
845 int alloc;
846 int pc;
847
848 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
849 return 0;
850
851 pc = preempt_count();
852
853 if (unlikely(tracing_selftest_running || tracing_disabled))
854 return 0;
855
856 alloc = sizeof(*entry) + size + 2; /* possible \n added */
857
858 local_save_flags(irq_flags);
859 buffer = global_trace.trace_buffer.buffer;
860 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
861 irq_flags, pc);
862 if (!event)
863 return 0;
864
865 entry = ring_buffer_event_data(event);
866 entry->ip = ip;
867
868 memcpy(&entry->buf, str, size);
869
870 /* Add a newline if necessary */
871 if (entry->buf[size - 1] != '\n') {
872 entry->buf[size] = '\n';
873 entry->buf[size + 1] = '\0';
874 } else
875 entry->buf[size] = '\0';
876
877 __buffer_unlock_commit(buffer, event);
878 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
879
880 return size;
881 }
882 EXPORT_SYMBOL_GPL(__trace_puts);
883
884 /**
885 * __trace_bputs - write the pointer to a constant string into trace buffer
886 * @ip: The address of the caller
887 * @str: The constant string to write to the buffer to
888 */
__trace_bputs(unsigned long ip,const char * str)889 int __trace_bputs(unsigned long ip, const char *str)
890 {
891 struct ring_buffer_event *event;
892 struct ring_buffer *buffer;
893 struct bputs_entry *entry;
894 unsigned long irq_flags;
895 int size = sizeof(struct bputs_entry);
896 int pc;
897
898 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
899 return 0;
900
901 pc = preempt_count();
902
903 if (unlikely(tracing_selftest_running || tracing_disabled))
904 return 0;
905
906 local_save_flags(irq_flags);
907 buffer = global_trace.trace_buffer.buffer;
908 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
909 irq_flags, pc);
910 if (!event)
911 return 0;
912
913 entry = ring_buffer_event_data(event);
914 entry->ip = ip;
915 entry->str = str;
916
917 __buffer_unlock_commit(buffer, event);
918 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
919
920 return 1;
921 }
922 EXPORT_SYMBOL_GPL(__trace_bputs);
923
924 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)925 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
926 {
927 struct tracer *tracer = tr->current_trace;
928 unsigned long flags;
929
930 if (in_nmi()) {
931 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
932 internal_trace_puts("*** snapshot is being ignored ***\n");
933 return;
934 }
935
936 if (!tr->allocated_snapshot) {
937 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
938 internal_trace_puts("*** stopping trace here! ***\n");
939 tracing_off();
940 return;
941 }
942
943 /* Note, snapshot can not be used when the tracer uses it */
944 if (tracer->use_max_tr) {
945 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
946 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
947 return;
948 }
949
950 local_irq_save(flags);
951 update_max_tr(tr, current, smp_processor_id(), cond_data);
952 local_irq_restore(flags);
953 }
954
tracing_snapshot_instance(struct trace_array * tr)955 void tracing_snapshot_instance(struct trace_array *tr)
956 {
957 tracing_snapshot_instance_cond(tr, NULL);
958 }
959
960 /**
961 * tracing_snapshot - take a snapshot of the current buffer.
962 *
963 * This causes a swap between the snapshot buffer and the current live
964 * tracing buffer. You can use this to take snapshots of the live
965 * trace when some condition is triggered, but continue to trace.
966 *
967 * Note, make sure to allocate the snapshot with either
968 * a tracing_snapshot_alloc(), or by doing it manually
969 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
970 *
971 * If the snapshot buffer is not allocated, it will stop tracing.
972 * Basically making a permanent snapshot.
973 */
tracing_snapshot(void)974 void tracing_snapshot(void)
975 {
976 struct trace_array *tr = &global_trace;
977
978 tracing_snapshot_instance(tr);
979 }
980 EXPORT_SYMBOL_GPL(tracing_snapshot);
981
982 /**
983 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
984 * @tr: The tracing instance to snapshot
985 * @cond_data: The data to be tested conditionally, and possibly saved
986 *
987 * This is the same as tracing_snapshot() except that the snapshot is
988 * conditional - the snapshot will only happen if the
989 * cond_snapshot.update() implementation receiving the cond_data
990 * returns true, which means that the trace array's cond_snapshot
991 * update() operation used the cond_data to determine whether the
992 * snapshot should be taken, and if it was, presumably saved it along
993 * with the snapshot.
994 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)995 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
996 {
997 tracing_snapshot_instance_cond(tr, cond_data);
998 }
999 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1000
1001 /**
1002 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1003 * @tr: The tracing instance
1004 *
1005 * When the user enables a conditional snapshot using
1006 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1007 * with the snapshot. This accessor is used to retrieve it.
1008 *
1009 * Should not be called from cond_snapshot.update(), since it takes
1010 * the tr->max_lock lock, which the code calling
1011 * cond_snapshot.update() has already done.
1012 *
1013 * Returns the cond_data associated with the trace array's snapshot.
1014 */
tracing_cond_snapshot_data(struct trace_array * tr)1015 void *tracing_cond_snapshot_data(struct trace_array *tr)
1016 {
1017 void *cond_data = NULL;
1018
1019 local_irq_disable();
1020 arch_spin_lock(&tr->max_lock);
1021
1022 if (tr->cond_snapshot)
1023 cond_data = tr->cond_snapshot->cond_data;
1024
1025 arch_spin_unlock(&tr->max_lock);
1026 local_irq_enable();
1027
1028 return cond_data;
1029 }
1030 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1031
1032 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1033 struct trace_buffer *size_buf, int cpu_id);
1034 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1035
tracing_alloc_snapshot_instance(struct trace_array * tr)1036 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1037 {
1038 int ret;
1039
1040 if (!tr->allocated_snapshot) {
1041
1042 /* allocate spare buffer */
1043 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1044 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1045 if (ret < 0)
1046 return ret;
1047
1048 tr->allocated_snapshot = true;
1049 }
1050
1051 return 0;
1052 }
1053
free_snapshot(struct trace_array * tr)1054 static void free_snapshot(struct trace_array *tr)
1055 {
1056 /*
1057 * We don't free the ring buffer. instead, resize it because
1058 * The max_tr ring buffer has some state (e.g. ring->clock) and
1059 * we want preserve it.
1060 */
1061 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1062 set_buffer_entries(&tr->max_buffer, 1);
1063 tracing_reset_online_cpus(&tr->max_buffer);
1064 tr->allocated_snapshot = false;
1065 }
1066
1067 /**
1068 * tracing_alloc_snapshot - allocate snapshot buffer.
1069 *
1070 * This only allocates the snapshot buffer if it isn't already
1071 * allocated - it doesn't also take a snapshot.
1072 *
1073 * This is meant to be used in cases where the snapshot buffer needs
1074 * to be set up for events that can't sleep but need to be able to
1075 * trigger a snapshot.
1076 */
tracing_alloc_snapshot(void)1077 int tracing_alloc_snapshot(void)
1078 {
1079 struct trace_array *tr = &global_trace;
1080 int ret;
1081
1082 ret = tracing_alloc_snapshot_instance(tr);
1083 WARN_ON(ret < 0);
1084
1085 return ret;
1086 }
1087 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1088
1089 /**
1090 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1091 *
1092 * This is similar to tracing_snapshot(), but it will allocate the
1093 * snapshot buffer if it isn't already allocated. Use this only
1094 * where it is safe to sleep, as the allocation may sleep.
1095 *
1096 * This causes a swap between the snapshot buffer and the current live
1097 * tracing buffer. You can use this to take snapshots of the live
1098 * trace when some condition is triggered, but continue to trace.
1099 */
tracing_snapshot_alloc(void)1100 void tracing_snapshot_alloc(void)
1101 {
1102 int ret;
1103
1104 ret = tracing_alloc_snapshot();
1105 if (ret < 0)
1106 return;
1107
1108 tracing_snapshot();
1109 }
1110 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1111
1112 /**
1113 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1114 * @tr: The tracing instance
1115 * @cond_data: User data to associate with the snapshot
1116 * @update: Implementation of the cond_snapshot update function
1117 *
1118 * Check whether the conditional snapshot for the given instance has
1119 * already been enabled, or if the current tracer is already using a
1120 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1121 * save the cond_data and update function inside.
1122 *
1123 * Returns 0 if successful, error otherwise.
1124 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1125 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1126 cond_update_fn_t update)
1127 {
1128 struct cond_snapshot *cond_snapshot;
1129 int ret = 0;
1130
1131 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1132 if (!cond_snapshot)
1133 return -ENOMEM;
1134
1135 cond_snapshot->cond_data = cond_data;
1136 cond_snapshot->update = update;
1137
1138 mutex_lock(&trace_types_lock);
1139
1140 ret = tracing_alloc_snapshot_instance(tr);
1141 if (ret)
1142 goto fail_unlock;
1143
1144 if (tr->current_trace->use_max_tr) {
1145 ret = -EBUSY;
1146 goto fail_unlock;
1147 }
1148
1149 /*
1150 * The cond_snapshot can only change to NULL without the
1151 * trace_types_lock. We don't care if we race with it going
1152 * to NULL, but we want to make sure that it's not set to
1153 * something other than NULL when we get here, which we can
1154 * do safely with only holding the trace_types_lock and not
1155 * having to take the max_lock.
1156 */
1157 if (tr->cond_snapshot) {
1158 ret = -EBUSY;
1159 goto fail_unlock;
1160 }
1161
1162 local_irq_disable();
1163 arch_spin_lock(&tr->max_lock);
1164 tr->cond_snapshot = cond_snapshot;
1165 arch_spin_unlock(&tr->max_lock);
1166 local_irq_enable();
1167
1168 mutex_unlock(&trace_types_lock);
1169
1170 return ret;
1171
1172 fail_unlock:
1173 mutex_unlock(&trace_types_lock);
1174 kfree(cond_snapshot);
1175 return ret;
1176 }
1177 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1178
1179 /**
1180 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1181 * @tr: The tracing instance
1182 *
1183 * Check whether the conditional snapshot for the given instance is
1184 * enabled; if so, free the cond_snapshot associated with it,
1185 * otherwise return -EINVAL.
1186 *
1187 * Returns 0 if successful, error otherwise.
1188 */
tracing_snapshot_cond_disable(struct trace_array * tr)1189 int tracing_snapshot_cond_disable(struct trace_array *tr)
1190 {
1191 int ret = 0;
1192
1193 local_irq_disable();
1194 arch_spin_lock(&tr->max_lock);
1195
1196 if (!tr->cond_snapshot)
1197 ret = -EINVAL;
1198 else {
1199 kfree(tr->cond_snapshot);
1200 tr->cond_snapshot = NULL;
1201 }
1202
1203 arch_spin_unlock(&tr->max_lock);
1204 local_irq_enable();
1205
1206 return ret;
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1209 #else
tracing_snapshot(void)1210 void tracing_snapshot(void)
1211 {
1212 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1213 }
1214 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1215 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1216 {
1217 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1218 }
1219 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1220 int tracing_alloc_snapshot(void)
1221 {
1222 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1223 return -ENODEV;
1224 }
1225 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1226 void tracing_snapshot_alloc(void)
1227 {
1228 /* Give warning */
1229 tracing_snapshot();
1230 }
1231 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1232 void *tracing_cond_snapshot_data(struct trace_array *tr)
1233 {
1234 return NULL;
1235 }
1236 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1237 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1238 {
1239 return -ENODEV;
1240 }
1241 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1242 int tracing_snapshot_cond_disable(struct trace_array *tr)
1243 {
1244 return false;
1245 }
1246 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1247 #endif /* CONFIG_TRACER_SNAPSHOT */
1248
tracer_tracing_off(struct trace_array * tr)1249 void tracer_tracing_off(struct trace_array *tr)
1250 {
1251 if (tr->trace_buffer.buffer)
1252 ring_buffer_record_off(tr->trace_buffer.buffer);
1253 /*
1254 * This flag is looked at when buffers haven't been allocated
1255 * yet, or by some tracers (like irqsoff), that just want to
1256 * know if the ring buffer has been disabled, but it can handle
1257 * races of where it gets disabled but we still do a record.
1258 * As the check is in the fast path of the tracers, it is more
1259 * important to be fast than accurate.
1260 */
1261 tr->buffer_disabled = 1;
1262 /* Make the flag seen by readers */
1263 smp_wmb();
1264 }
1265
1266 /**
1267 * tracing_off - turn off tracing buffers
1268 *
1269 * This function stops the tracing buffers from recording data.
1270 * It does not disable any overhead the tracers themselves may
1271 * be causing. This function simply causes all recording to
1272 * the ring buffers to fail.
1273 */
tracing_off(void)1274 void tracing_off(void)
1275 {
1276 tracer_tracing_off(&global_trace);
1277 }
1278 EXPORT_SYMBOL_GPL(tracing_off);
1279
disable_trace_on_warning(void)1280 void disable_trace_on_warning(void)
1281 {
1282 if (__disable_trace_on_warning)
1283 tracing_off();
1284 }
1285
1286 /**
1287 * tracer_tracing_is_on - show real state of ring buffer enabled
1288 * @tr : the trace array to know if ring buffer is enabled
1289 *
1290 * Shows real state of the ring buffer if it is enabled or not.
1291 */
tracer_tracing_is_on(struct trace_array * tr)1292 bool tracer_tracing_is_on(struct trace_array *tr)
1293 {
1294 if (tr->trace_buffer.buffer)
1295 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1296 return !tr->buffer_disabled;
1297 }
1298
1299 /**
1300 * tracing_is_on - show state of ring buffers enabled
1301 */
tracing_is_on(void)1302 int tracing_is_on(void)
1303 {
1304 return tracer_tracing_is_on(&global_trace);
1305 }
1306 EXPORT_SYMBOL_GPL(tracing_is_on);
1307
set_buf_size(char * str)1308 static int __init set_buf_size(char *str)
1309 {
1310 unsigned long buf_size;
1311
1312 if (!str)
1313 return 0;
1314 buf_size = memparse(str, &str);
1315 /*
1316 * nr_entries can not be zero and the startup
1317 * tests require some buffer space. Therefore
1318 * ensure we have at least 4096 bytes of buffer.
1319 */
1320 trace_buf_size = max(4096UL, buf_size);
1321 return 1;
1322 }
1323 __setup("trace_buf_size=", set_buf_size);
1324
set_tracing_thresh(char * str)1325 static int __init set_tracing_thresh(char *str)
1326 {
1327 unsigned long threshold;
1328 int ret;
1329
1330 if (!str)
1331 return 0;
1332 ret = kstrtoul(str, 0, &threshold);
1333 if (ret < 0)
1334 return 0;
1335 tracing_thresh = threshold * 1000;
1336 return 1;
1337 }
1338 __setup("tracing_thresh=", set_tracing_thresh);
1339
nsecs_to_usecs(unsigned long nsecs)1340 unsigned long nsecs_to_usecs(unsigned long nsecs)
1341 {
1342 return nsecs / 1000;
1343 }
1344
1345 /*
1346 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1347 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1348 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1349 * of strings in the order that the evals (enum) were defined.
1350 */
1351 #undef C
1352 #define C(a, b) b
1353
1354 /* These must match the bit postions in trace_iterator_flags */
1355 static const char *trace_options[] = {
1356 TRACE_FLAGS
1357 NULL
1358 };
1359
1360 static struct {
1361 u64 (*func)(void);
1362 const char *name;
1363 int in_ns; /* is this clock in nanoseconds? */
1364 } trace_clocks[] = {
1365 { trace_clock_local, "local", 1 },
1366 { trace_clock_global, "global", 1 },
1367 { trace_clock_counter, "counter", 0 },
1368 { trace_clock_jiffies, "uptime", 0 },
1369 { trace_clock, "perf", 1 },
1370 { ktime_get_mono_fast_ns, "mono", 1 },
1371 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1372 { ktime_get_boot_fast_ns, "boot", 1 },
1373 ARCH_TRACE_CLOCKS
1374 };
1375
trace_clock_in_ns(struct trace_array * tr)1376 bool trace_clock_in_ns(struct trace_array *tr)
1377 {
1378 if (trace_clocks[tr->clock_id].in_ns)
1379 return true;
1380
1381 return false;
1382 }
1383
1384 /*
1385 * trace_parser_get_init - gets the buffer for trace parser
1386 */
trace_parser_get_init(struct trace_parser * parser,int size)1387 int trace_parser_get_init(struct trace_parser *parser, int size)
1388 {
1389 memset(parser, 0, sizeof(*parser));
1390
1391 parser->buffer = kmalloc(size, GFP_KERNEL);
1392 if (!parser->buffer)
1393 return 1;
1394
1395 parser->size = size;
1396 return 0;
1397 }
1398
1399 /*
1400 * trace_parser_put - frees the buffer for trace parser
1401 */
trace_parser_put(struct trace_parser * parser)1402 void trace_parser_put(struct trace_parser *parser)
1403 {
1404 kfree(parser->buffer);
1405 parser->buffer = NULL;
1406 }
1407
1408 /*
1409 * trace_get_user - reads the user input string separated by space
1410 * (matched by isspace(ch))
1411 *
1412 * For each string found the 'struct trace_parser' is updated,
1413 * and the function returns.
1414 *
1415 * Returns number of bytes read.
1416 *
1417 * See kernel/trace/trace.h for 'struct trace_parser' details.
1418 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1419 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1420 size_t cnt, loff_t *ppos)
1421 {
1422 char ch;
1423 size_t read = 0;
1424 ssize_t ret;
1425
1426 if (!*ppos)
1427 trace_parser_clear(parser);
1428
1429 ret = get_user(ch, ubuf++);
1430 if (ret)
1431 goto out;
1432
1433 read++;
1434 cnt--;
1435
1436 /*
1437 * The parser is not finished with the last write,
1438 * continue reading the user input without skipping spaces.
1439 */
1440 if (!parser->cont) {
1441 /* skip white space */
1442 while (cnt && isspace(ch)) {
1443 ret = get_user(ch, ubuf++);
1444 if (ret)
1445 goto out;
1446 read++;
1447 cnt--;
1448 }
1449
1450 parser->idx = 0;
1451
1452 /* only spaces were written */
1453 if (isspace(ch) || !ch) {
1454 *ppos += read;
1455 ret = read;
1456 goto out;
1457 }
1458 }
1459
1460 /* read the non-space input */
1461 while (cnt && !isspace(ch) && ch) {
1462 if (parser->idx < parser->size - 1)
1463 parser->buffer[parser->idx++] = ch;
1464 else {
1465 ret = -EINVAL;
1466 goto out;
1467 }
1468 ret = get_user(ch, ubuf++);
1469 if (ret)
1470 goto out;
1471 read++;
1472 cnt--;
1473 }
1474
1475 /* We either got finished input or we have to wait for another call. */
1476 if (isspace(ch) || !ch) {
1477 parser->buffer[parser->idx] = 0;
1478 parser->cont = false;
1479 } else if (parser->idx < parser->size - 1) {
1480 parser->cont = true;
1481 parser->buffer[parser->idx++] = ch;
1482 /* Make sure the parsed string always terminates with '\0'. */
1483 parser->buffer[parser->idx] = 0;
1484 } else {
1485 ret = -EINVAL;
1486 goto out;
1487 }
1488
1489 *ppos += read;
1490 ret = read;
1491
1492 out:
1493 return ret;
1494 }
1495
1496 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1497 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1498 {
1499 int len;
1500
1501 if (trace_seq_used(s) <= s->seq.readpos)
1502 return -EBUSY;
1503
1504 len = trace_seq_used(s) - s->seq.readpos;
1505 if (cnt > len)
1506 cnt = len;
1507 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1508
1509 s->seq.readpos += cnt;
1510 return cnt;
1511 }
1512
1513 unsigned long __read_mostly tracing_thresh;
1514
1515 #ifdef CONFIG_TRACER_MAX_TRACE
1516 /*
1517 * Copy the new maximum trace into the separate maximum-trace
1518 * structure. (this way the maximum trace is permanently saved,
1519 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1520 */
1521 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1522 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1523 {
1524 struct trace_buffer *trace_buf = &tr->trace_buffer;
1525 struct trace_buffer *max_buf = &tr->max_buffer;
1526 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1527 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1528
1529 max_buf->cpu = cpu;
1530 max_buf->time_start = data->preempt_timestamp;
1531
1532 max_data->saved_latency = tr->max_latency;
1533 max_data->critical_start = data->critical_start;
1534 max_data->critical_end = data->critical_end;
1535
1536 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1537 max_data->pid = tsk->pid;
1538 /*
1539 * If tsk == current, then use current_uid(), as that does not use
1540 * RCU. The irq tracer can be called out of RCU scope.
1541 */
1542 if (tsk == current)
1543 max_data->uid = current_uid();
1544 else
1545 max_data->uid = task_uid(tsk);
1546
1547 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1548 max_data->policy = tsk->policy;
1549 max_data->rt_priority = tsk->rt_priority;
1550
1551 /* record this tasks comm */
1552 tracing_record_cmdline(tsk);
1553 }
1554
1555 /**
1556 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1557 * @tr: tracer
1558 * @tsk: the task with the latency
1559 * @cpu: The cpu that initiated the trace.
1560 * @cond_data: User data associated with a conditional snapshot
1561 *
1562 * Flip the buffers between the @tr and the max_tr and record information
1563 * about which task was the cause of this latency.
1564 */
1565 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1566 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1567 void *cond_data)
1568 {
1569 if (tr->stop_count)
1570 return;
1571
1572 WARN_ON_ONCE(!irqs_disabled());
1573
1574 if (!tr->allocated_snapshot) {
1575 /* Only the nop tracer should hit this when disabling */
1576 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1577 return;
1578 }
1579
1580 arch_spin_lock(&tr->max_lock);
1581
1582 /* Inherit the recordable setting from trace_buffer */
1583 if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1584 ring_buffer_record_on(tr->max_buffer.buffer);
1585 else
1586 ring_buffer_record_off(tr->max_buffer.buffer);
1587
1588 #ifdef CONFIG_TRACER_SNAPSHOT
1589 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1590 goto out_unlock;
1591 #endif
1592 swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1593
1594 __update_max_tr(tr, tsk, cpu);
1595
1596 out_unlock:
1597 arch_spin_unlock(&tr->max_lock);
1598 }
1599
1600 /**
1601 * update_max_tr_single - only copy one trace over, and reset the rest
1602 * @tr: tracer
1603 * @tsk: task with the latency
1604 * @cpu: the cpu of the buffer to copy.
1605 *
1606 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1607 */
1608 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1609 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1610 {
1611 int ret;
1612
1613 if (tr->stop_count)
1614 return;
1615
1616 WARN_ON_ONCE(!irqs_disabled());
1617 if (!tr->allocated_snapshot) {
1618 /* Only the nop tracer should hit this when disabling */
1619 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1620 return;
1621 }
1622
1623 arch_spin_lock(&tr->max_lock);
1624
1625 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1626
1627 if (ret == -EBUSY) {
1628 /*
1629 * We failed to swap the buffer due to a commit taking
1630 * place on this CPU. We fail to record, but we reset
1631 * the max trace buffer (no one writes directly to it)
1632 * and flag that it failed.
1633 */
1634 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1635 "Failed to swap buffers due to commit in progress\n");
1636 }
1637
1638 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1639
1640 __update_max_tr(tr, tsk, cpu);
1641 arch_spin_unlock(&tr->max_lock);
1642 }
1643 #endif /* CONFIG_TRACER_MAX_TRACE */
1644
wait_on_pipe(struct trace_iterator * iter,int full)1645 static int wait_on_pipe(struct trace_iterator *iter, int full)
1646 {
1647 /* Iterators are static, they should be filled or empty */
1648 if (trace_buffer_iter(iter, iter->cpu_file))
1649 return 0;
1650
1651 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1652 full);
1653 }
1654
1655 #ifdef CONFIG_FTRACE_STARTUP_TEST
1656 static bool selftests_can_run;
1657
1658 struct trace_selftests {
1659 struct list_head list;
1660 struct tracer *type;
1661 };
1662
1663 static LIST_HEAD(postponed_selftests);
1664
save_selftest(struct tracer * type)1665 static int save_selftest(struct tracer *type)
1666 {
1667 struct trace_selftests *selftest;
1668
1669 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1670 if (!selftest)
1671 return -ENOMEM;
1672
1673 selftest->type = type;
1674 list_add(&selftest->list, &postponed_selftests);
1675 return 0;
1676 }
1677
run_tracer_selftest(struct tracer * type)1678 static int run_tracer_selftest(struct tracer *type)
1679 {
1680 struct trace_array *tr = &global_trace;
1681 struct tracer *saved_tracer = tr->current_trace;
1682 int ret;
1683
1684 if (!type->selftest || tracing_selftest_disabled)
1685 return 0;
1686
1687 /*
1688 * If a tracer registers early in boot up (before scheduling is
1689 * initialized and such), then do not run its selftests yet.
1690 * Instead, run it a little later in the boot process.
1691 */
1692 if (!selftests_can_run)
1693 return save_selftest(type);
1694
1695 /*
1696 * Run a selftest on this tracer.
1697 * Here we reset the trace buffer, and set the current
1698 * tracer to be this tracer. The tracer can then run some
1699 * internal tracing to verify that everything is in order.
1700 * If we fail, we do not register this tracer.
1701 */
1702 tracing_reset_online_cpus(&tr->trace_buffer);
1703
1704 tr->current_trace = type;
1705
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707 if (type->use_max_tr) {
1708 /* If we expanded the buffers, make sure the max is expanded too */
1709 if (ring_buffer_expanded)
1710 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1711 RING_BUFFER_ALL_CPUS);
1712 tr->allocated_snapshot = true;
1713 }
1714 #endif
1715
1716 /* the test is responsible for initializing and enabling */
1717 pr_info("Testing tracer %s: ", type->name);
1718 ret = type->selftest(type, tr);
1719 /* the test is responsible for resetting too */
1720 tr->current_trace = saved_tracer;
1721 if (ret) {
1722 printk(KERN_CONT "FAILED!\n");
1723 /* Add the warning after printing 'FAILED' */
1724 WARN_ON(1);
1725 return -1;
1726 }
1727 /* Only reset on passing, to avoid touching corrupted buffers */
1728 tracing_reset_online_cpus(&tr->trace_buffer);
1729
1730 #ifdef CONFIG_TRACER_MAX_TRACE
1731 if (type->use_max_tr) {
1732 tr->allocated_snapshot = false;
1733
1734 /* Shrink the max buffer again */
1735 if (ring_buffer_expanded)
1736 ring_buffer_resize(tr->max_buffer.buffer, 1,
1737 RING_BUFFER_ALL_CPUS);
1738 }
1739 #endif
1740
1741 printk(KERN_CONT "PASSED\n");
1742 return 0;
1743 }
1744
init_trace_selftests(void)1745 static __init int init_trace_selftests(void)
1746 {
1747 struct trace_selftests *p, *n;
1748 struct tracer *t, **last;
1749 int ret;
1750
1751 selftests_can_run = true;
1752
1753 mutex_lock(&trace_types_lock);
1754
1755 if (list_empty(&postponed_selftests))
1756 goto out;
1757
1758 pr_info("Running postponed tracer tests:\n");
1759
1760 tracing_selftest_running = true;
1761 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1762 /* This loop can take minutes when sanitizers are enabled, so
1763 * lets make sure we allow RCU processing.
1764 */
1765 cond_resched();
1766 ret = run_tracer_selftest(p->type);
1767 /* If the test fails, then warn and remove from available_tracers */
1768 if (ret < 0) {
1769 WARN(1, "tracer: %s failed selftest, disabling\n",
1770 p->type->name);
1771 last = &trace_types;
1772 for (t = trace_types; t; t = t->next) {
1773 if (t == p->type) {
1774 *last = t->next;
1775 break;
1776 }
1777 last = &t->next;
1778 }
1779 }
1780 list_del(&p->list);
1781 kfree(p);
1782 }
1783 tracing_selftest_running = false;
1784
1785 out:
1786 mutex_unlock(&trace_types_lock);
1787
1788 return 0;
1789 }
1790 core_initcall(init_trace_selftests);
1791 #else
run_tracer_selftest(struct tracer * type)1792 static inline int run_tracer_selftest(struct tracer *type)
1793 {
1794 return 0;
1795 }
1796 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1797
1798 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1799
1800 static void __init apply_trace_boot_options(void);
1801
1802 /**
1803 * register_tracer - register a tracer with the ftrace system.
1804 * @type: the plugin for the tracer
1805 *
1806 * Register a new plugin tracer.
1807 */
register_tracer(struct tracer * type)1808 int __init register_tracer(struct tracer *type)
1809 {
1810 struct tracer *t;
1811 int ret = 0;
1812
1813 if (!type->name) {
1814 pr_info("Tracer must have a name\n");
1815 return -1;
1816 }
1817
1818 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1819 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1820 return -1;
1821 }
1822
1823 if (security_locked_down(LOCKDOWN_TRACEFS)) {
1824 pr_warning("Can not register tracer %s due to lockdown\n",
1825 type->name);
1826 return -EPERM;
1827 }
1828
1829 mutex_lock(&trace_types_lock);
1830
1831 tracing_selftest_running = true;
1832
1833 for (t = trace_types; t; t = t->next) {
1834 if (strcmp(type->name, t->name) == 0) {
1835 /* already found */
1836 pr_info("Tracer %s already registered\n",
1837 type->name);
1838 ret = -1;
1839 goto out;
1840 }
1841 }
1842
1843 if (!type->set_flag)
1844 type->set_flag = &dummy_set_flag;
1845 if (!type->flags) {
1846 /*allocate a dummy tracer_flags*/
1847 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1848 if (!type->flags) {
1849 ret = -ENOMEM;
1850 goto out;
1851 }
1852 type->flags->val = 0;
1853 type->flags->opts = dummy_tracer_opt;
1854 } else
1855 if (!type->flags->opts)
1856 type->flags->opts = dummy_tracer_opt;
1857
1858 /* store the tracer for __set_tracer_option */
1859 type->flags->trace = type;
1860
1861 ret = run_tracer_selftest(type);
1862 if (ret < 0)
1863 goto out;
1864
1865 type->next = trace_types;
1866 trace_types = type;
1867 add_tracer_options(&global_trace, type);
1868
1869 out:
1870 tracing_selftest_running = false;
1871 mutex_unlock(&trace_types_lock);
1872
1873 if (ret || !default_bootup_tracer)
1874 goto out_unlock;
1875
1876 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1877 goto out_unlock;
1878
1879 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1880 /* Do we want this tracer to start on bootup? */
1881 tracing_set_tracer(&global_trace, type->name);
1882 default_bootup_tracer = NULL;
1883
1884 apply_trace_boot_options();
1885
1886 /* disable other selftests, since this will break it. */
1887 tracing_selftest_disabled = true;
1888 #ifdef CONFIG_FTRACE_STARTUP_TEST
1889 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1890 type->name);
1891 #endif
1892
1893 out_unlock:
1894 return ret;
1895 }
1896
tracing_reset_cpu(struct trace_buffer * buf,int cpu)1897 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1898 {
1899 struct ring_buffer *buffer = buf->buffer;
1900
1901 if (!buffer)
1902 return;
1903
1904 ring_buffer_record_disable(buffer);
1905
1906 /* Make sure all commits have finished */
1907 synchronize_rcu();
1908 ring_buffer_reset_cpu(buffer, cpu);
1909
1910 ring_buffer_record_enable(buffer);
1911 }
1912
tracing_reset_online_cpus(struct trace_buffer * buf)1913 void tracing_reset_online_cpus(struct trace_buffer *buf)
1914 {
1915 struct ring_buffer *buffer = buf->buffer;
1916 int cpu;
1917
1918 if (!buffer)
1919 return;
1920
1921 ring_buffer_record_disable(buffer);
1922
1923 /* Make sure all commits have finished */
1924 synchronize_rcu();
1925
1926 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1927
1928 for_each_online_cpu(cpu)
1929 ring_buffer_reset_cpu(buffer, cpu);
1930
1931 ring_buffer_record_enable(buffer);
1932 }
1933
1934 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)1935 void tracing_reset_all_online_cpus_unlocked(void)
1936 {
1937 struct trace_array *tr;
1938
1939 lockdep_assert_held(&trace_types_lock);
1940
1941 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1942 if (!tr->clear_trace)
1943 continue;
1944 tr->clear_trace = false;
1945 tracing_reset_online_cpus(&tr->trace_buffer);
1946 #ifdef CONFIG_TRACER_MAX_TRACE
1947 tracing_reset_online_cpus(&tr->max_buffer);
1948 #endif
1949 }
1950 }
1951
tracing_reset_all_online_cpus(void)1952 void tracing_reset_all_online_cpus(void)
1953 {
1954 mutex_lock(&trace_types_lock);
1955 tracing_reset_all_online_cpus_unlocked();
1956 mutex_unlock(&trace_types_lock);
1957 }
1958
1959 /*
1960 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
1961 * is the tgid last observed corresponding to pid=i.
1962 */
1963 static int *tgid_map;
1964
1965 /* The maximum valid index into tgid_map. */
1966 static size_t tgid_map_max;
1967
1968 #define SAVED_CMDLINES_DEFAULT 128
1969 #define NO_CMDLINE_MAP UINT_MAX
1970 /*
1971 * Preemption must be disabled before acquiring trace_cmdline_lock.
1972 * The various trace_arrays' max_lock must be acquired in a context
1973 * where interrupt is disabled.
1974 */
1975 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1976 struct saved_cmdlines_buffer {
1977 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1978 unsigned *map_cmdline_to_pid;
1979 unsigned cmdline_num;
1980 int cmdline_idx;
1981 char saved_cmdlines[];
1982 };
1983 static struct saved_cmdlines_buffer *savedcmd;
1984
get_saved_cmdlines(int idx)1985 static inline char *get_saved_cmdlines(int idx)
1986 {
1987 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1988 }
1989
set_cmdline(int idx,const char * cmdline)1990 static inline void set_cmdline(int idx, const char *cmdline)
1991 {
1992 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1993 }
1994
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)1995 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
1996 {
1997 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
1998
1999 kfree(s->map_cmdline_to_pid);
2000 kmemleak_free(s);
2001 free_pages((unsigned long)s, order);
2002 }
2003
allocate_cmdlines_buffer(unsigned int val)2004 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2005 {
2006 struct saved_cmdlines_buffer *s;
2007 struct page *page;
2008 int orig_size, size;
2009 int order;
2010
2011 /* Figure out how much is needed to hold the given number of cmdlines */
2012 orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2013 order = get_order(orig_size);
2014 size = 1 << (order + PAGE_SHIFT);
2015 page = alloc_pages(GFP_KERNEL, order);
2016 if (!page)
2017 return NULL;
2018
2019 s = page_address(page);
2020 kmemleak_alloc(s, size, 1, GFP_KERNEL);
2021 memset(s, 0, sizeof(*s));
2022
2023 /* Round up to actual allocation */
2024 val = (size - sizeof(*s)) / TASK_COMM_LEN;
2025 s->cmdline_num = val;
2026
2027 s->map_cmdline_to_pid = kmalloc_array(val,
2028 sizeof(*s->map_cmdline_to_pid),
2029 GFP_KERNEL);
2030 if (!s->map_cmdline_to_pid) {
2031 free_saved_cmdlines_buffer(s);
2032 return NULL;
2033 }
2034
2035 s->cmdline_idx = 0;
2036 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2037 sizeof(s->map_pid_to_cmdline));
2038 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2039 val * sizeof(*s->map_cmdline_to_pid));
2040
2041 return s;
2042 }
2043
trace_create_savedcmd(void)2044 static int trace_create_savedcmd(void)
2045 {
2046 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2047
2048 return savedcmd ? 0 : -ENOMEM;
2049 }
2050
is_tracing_stopped(void)2051 int is_tracing_stopped(void)
2052 {
2053 return global_trace.stop_count;
2054 }
2055
2056 /**
2057 * tracing_start - quick start of the tracer
2058 *
2059 * If tracing is enabled but was stopped by tracing_stop,
2060 * this will start the tracer back up.
2061 */
tracing_start(void)2062 void tracing_start(void)
2063 {
2064 struct ring_buffer *buffer;
2065 unsigned long flags;
2066
2067 if (tracing_disabled)
2068 return;
2069
2070 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2071 if (--global_trace.stop_count) {
2072 if (global_trace.stop_count < 0) {
2073 /* Someone screwed up their debugging */
2074 WARN_ON_ONCE(1);
2075 global_trace.stop_count = 0;
2076 }
2077 goto out;
2078 }
2079
2080 /* Prevent the buffers from switching */
2081 arch_spin_lock(&global_trace.max_lock);
2082
2083 buffer = global_trace.trace_buffer.buffer;
2084 if (buffer)
2085 ring_buffer_record_enable(buffer);
2086
2087 #ifdef CONFIG_TRACER_MAX_TRACE
2088 buffer = global_trace.max_buffer.buffer;
2089 if (buffer)
2090 ring_buffer_record_enable(buffer);
2091 #endif
2092
2093 arch_spin_unlock(&global_trace.max_lock);
2094
2095 out:
2096 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2097 }
2098
tracing_start_tr(struct trace_array * tr)2099 static void tracing_start_tr(struct trace_array *tr)
2100 {
2101 struct ring_buffer *buffer;
2102 unsigned long flags;
2103
2104 if (tracing_disabled)
2105 return;
2106
2107 /* If global, we need to also start the max tracer */
2108 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2109 return tracing_start();
2110
2111 raw_spin_lock_irqsave(&tr->start_lock, flags);
2112
2113 if (--tr->stop_count) {
2114 if (tr->stop_count < 0) {
2115 /* Someone screwed up their debugging */
2116 WARN_ON_ONCE(1);
2117 tr->stop_count = 0;
2118 }
2119 goto out;
2120 }
2121
2122 buffer = tr->trace_buffer.buffer;
2123 if (buffer)
2124 ring_buffer_record_enable(buffer);
2125
2126 out:
2127 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2128 }
2129
2130 /**
2131 * tracing_stop - quick stop of the tracer
2132 *
2133 * Light weight way to stop tracing. Use in conjunction with
2134 * tracing_start.
2135 */
tracing_stop(void)2136 void tracing_stop(void)
2137 {
2138 struct ring_buffer *buffer;
2139 unsigned long flags;
2140
2141 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2142 if (global_trace.stop_count++)
2143 goto out;
2144
2145 /* Prevent the buffers from switching */
2146 arch_spin_lock(&global_trace.max_lock);
2147
2148 buffer = global_trace.trace_buffer.buffer;
2149 if (buffer)
2150 ring_buffer_record_disable(buffer);
2151
2152 #ifdef CONFIG_TRACER_MAX_TRACE
2153 buffer = global_trace.max_buffer.buffer;
2154 if (buffer)
2155 ring_buffer_record_disable(buffer);
2156 #endif
2157
2158 arch_spin_unlock(&global_trace.max_lock);
2159
2160 out:
2161 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2162 }
2163
tracing_stop_tr(struct trace_array * tr)2164 static void tracing_stop_tr(struct trace_array *tr)
2165 {
2166 struct ring_buffer *buffer;
2167 unsigned long flags;
2168
2169 /* If global, we need to also stop the max tracer */
2170 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2171 return tracing_stop();
2172
2173 raw_spin_lock_irqsave(&tr->start_lock, flags);
2174 if (tr->stop_count++)
2175 goto out;
2176
2177 buffer = tr->trace_buffer.buffer;
2178 if (buffer)
2179 ring_buffer_record_disable(buffer);
2180
2181 out:
2182 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2183 }
2184
trace_save_cmdline(struct task_struct * tsk)2185 static int trace_save_cmdline(struct task_struct *tsk)
2186 {
2187 unsigned tpid, idx;
2188
2189 /* treat recording of idle task as a success */
2190 if (!tsk->pid)
2191 return 1;
2192
2193 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2194
2195 /*
2196 * It's not the end of the world if we don't get
2197 * the lock, but we also don't want to spin
2198 * nor do we want to disable interrupts,
2199 * so if we miss here, then better luck next time.
2200 *
2201 * This is called within the scheduler and wake up, so interrupts
2202 * had better been disabled and run queue lock been held.
2203 */
2204 if (!arch_spin_trylock(&trace_cmdline_lock))
2205 return 0;
2206
2207 idx = savedcmd->map_pid_to_cmdline[tpid];
2208 if (idx == NO_CMDLINE_MAP) {
2209 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2210
2211 savedcmd->map_pid_to_cmdline[tpid] = idx;
2212 savedcmd->cmdline_idx = idx;
2213 }
2214
2215 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2216 set_cmdline(idx, tsk->comm);
2217
2218 arch_spin_unlock(&trace_cmdline_lock);
2219
2220 return 1;
2221 }
2222
__trace_find_cmdline(int pid,char comm[])2223 static void __trace_find_cmdline(int pid, char comm[])
2224 {
2225 unsigned map;
2226 int tpid;
2227
2228 if (!pid) {
2229 strcpy(comm, "<idle>");
2230 return;
2231 }
2232
2233 if (WARN_ON_ONCE(pid < 0)) {
2234 strcpy(comm, "<XXX>");
2235 return;
2236 }
2237
2238 tpid = pid & (PID_MAX_DEFAULT - 1);
2239 map = savedcmd->map_pid_to_cmdline[tpid];
2240 if (map != NO_CMDLINE_MAP) {
2241 tpid = savedcmd->map_cmdline_to_pid[map];
2242 if (tpid == pid) {
2243 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2244 return;
2245 }
2246 }
2247 strcpy(comm, "<...>");
2248 }
2249
trace_find_cmdline(int pid,char comm[])2250 void trace_find_cmdline(int pid, char comm[])
2251 {
2252 preempt_disable();
2253 arch_spin_lock(&trace_cmdline_lock);
2254
2255 __trace_find_cmdline(pid, comm);
2256
2257 arch_spin_unlock(&trace_cmdline_lock);
2258 preempt_enable();
2259 }
2260
trace_find_tgid_ptr(int pid)2261 static int *trace_find_tgid_ptr(int pid)
2262 {
2263 /*
2264 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2265 * if we observe a non-NULL tgid_map then we also observe the correct
2266 * tgid_map_max.
2267 */
2268 int *map = smp_load_acquire(&tgid_map);
2269
2270 if (unlikely(!map || pid > tgid_map_max))
2271 return NULL;
2272
2273 return &map[pid];
2274 }
2275
trace_find_tgid(int pid)2276 int trace_find_tgid(int pid)
2277 {
2278 int *ptr = trace_find_tgid_ptr(pid);
2279
2280 return ptr ? *ptr : 0;
2281 }
2282
trace_save_tgid(struct task_struct * tsk)2283 static int trace_save_tgid(struct task_struct *tsk)
2284 {
2285 int *ptr;
2286
2287 /* treat recording of idle task as a success */
2288 if (!tsk->pid)
2289 return 1;
2290
2291 ptr = trace_find_tgid_ptr(tsk->pid);
2292 if (!ptr)
2293 return 0;
2294
2295 *ptr = tsk->tgid;
2296 return 1;
2297 }
2298
tracing_record_taskinfo_skip(int flags)2299 static bool tracing_record_taskinfo_skip(int flags)
2300 {
2301 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2302 return true;
2303 if (!__this_cpu_read(trace_taskinfo_save))
2304 return true;
2305 return false;
2306 }
2307
2308 /**
2309 * tracing_record_taskinfo - record the task info of a task
2310 *
2311 * @task: task to record
2312 * @flags: TRACE_RECORD_CMDLINE for recording comm
2313 * TRACE_RECORD_TGID for recording tgid
2314 */
tracing_record_taskinfo(struct task_struct * task,int flags)2315 void tracing_record_taskinfo(struct task_struct *task, int flags)
2316 {
2317 bool done;
2318
2319 if (tracing_record_taskinfo_skip(flags))
2320 return;
2321
2322 /*
2323 * Record as much task information as possible. If some fail, continue
2324 * to try to record the others.
2325 */
2326 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2327 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2328
2329 /* If recording any information failed, retry again soon. */
2330 if (!done)
2331 return;
2332
2333 __this_cpu_write(trace_taskinfo_save, false);
2334 }
2335
2336 /**
2337 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2338 *
2339 * @prev: previous task during sched_switch
2340 * @next: next task during sched_switch
2341 * @flags: TRACE_RECORD_CMDLINE for recording comm
2342 * TRACE_RECORD_TGID for recording tgid
2343 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2344 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2345 struct task_struct *next, int flags)
2346 {
2347 bool done;
2348
2349 if (tracing_record_taskinfo_skip(flags))
2350 return;
2351
2352 /*
2353 * Record as much task information as possible. If some fail, continue
2354 * to try to record the others.
2355 */
2356 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2357 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2358 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2359 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2360
2361 /* If recording any information failed, retry again soon. */
2362 if (!done)
2363 return;
2364
2365 __this_cpu_write(trace_taskinfo_save, false);
2366 }
2367
2368 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2369 void tracing_record_cmdline(struct task_struct *task)
2370 {
2371 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2372 }
2373
tracing_record_tgid(struct task_struct * task)2374 void tracing_record_tgid(struct task_struct *task)
2375 {
2376 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2377 }
2378
2379 /*
2380 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2381 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2382 * simplifies those functions and keeps them in sync.
2383 */
trace_handle_return(struct trace_seq * s)2384 enum print_line_t trace_handle_return(struct trace_seq *s)
2385 {
2386 return trace_seq_has_overflowed(s) ?
2387 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2388 }
2389 EXPORT_SYMBOL_GPL(trace_handle_return);
2390
2391 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2392 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2393 unsigned long flags, int pc)
2394 {
2395 struct task_struct *tsk = current;
2396
2397 entry->preempt_count = pc & 0xff;
2398 entry->pid = (tsk) ? tsk->pid : 0;
2399 entry->type = type;
2400 entry->flags =
2401 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2402 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2403 #else
2404 TRACE_FLAG_IRQS_NOSUPPORT |
2405 #endif
2406 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2407 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2408 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2409 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2410 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2411 }
2412 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2413
2414 struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2415 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2416 int type,
2417 unsigned long len,
2418 unsigned long flags, int pc)
2419 {
2420 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2421 }
2422
2423 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2424 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2425 static int trace_buffered_event_ref;
2426
2427 /**
2428 * trace_buffered_event_enable - enable buffering events
2429 *
2430 * When events are being filtered, it is quicker to use a temporary
2431 * buffer to write the event data into if there's a likely chance
2432 * that it will not be committed. The discard of the ring buffer
2433 * is not as fast as committing, and is much slower than copying
2434 * a commit.
2435 *
2436 * When an event is to be filtered, allocate per cpu buffers to
2437 * write the event data into, and if the event is filtered and discarded
2438 * it is simply dropped, otherwise, the entire data is to be committed
2439 * in one shot.
2440 */
trace_buffered_event_enable(void)2441 void trace_buffered_event_enable(void)
2442 {
2443 struct ring_buffer_event *event;
2444 struct page *page;
2445 int cpu;
2446
2447 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2448
2449 if (trace_buffered_event_ref++)
2450 return;
2451
2452 for_each_tracing_cpu(cpu) {
2453 page = alloc_pages_node(cpu_to_node(cpu),
2454 GFP_KERNEL | __GFP_NORETRY, 0);
2455 /* This is just an optimization and can handle failures */
2456 if (!page) {
2457 pr_err("Failed to allocate event buffer\n");
2458 break;
2459 }
2460
2461 event = page_address(page);
2462 memset(event, 0, sizeof(*event));
2463
2464 per_cpu(trace_buffered_event, cpu) = event;
2465
2466 preempt_disable();
2467 if (cpu == smp_processor_id() &&
2468 this_cpu_read(trace_buffered_event) !=
2469 per_cpu(trace_buffered_event, cpu))
2470 WARN_ON_ONCE(1);
2471 preempt_enable();
2472 }
2473 }
2474
enable_trace_buffered_event(void * data)2475 static void enable_trace_buffered_event(void *data)
2476 {
2477 /* Probably not needed, but do it anyway */
2478 smp_rmb();
2479 this_cpu_dec(trace_buffered_event_cnt);
2480 }
2481
disable_trace_buffered_event(void * data)2482 static void disable_trace_buffered_event(void *data)
2483 {
2484 this_cpu_inc(trace_buffered_event_cnt);
2485 }
2486
2487 /**
2488 * trace_buffered_event_disable - disable buffering events
2489 *
2490 * When a filter is removed, it is faster to not use the buffered
2491 * events, and to commit directly into the ring buffer. Free up
2492 * the temp buffers when there are no more users. This requires
2493 * special synchronization with current events.
2494 */
trace_buffered_event_disable(void)2495 void trace_buffered_event_disable(void)
2496 {
2497 int cpu;
2498
2499 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2500
2501 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2502 return;
2503
2504 if (--trace_buffered_event_ref)
2505 return;
2506
2507 /* For each CPU, set the buffer as used. */
2508 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2509 NULL, true);
2510
2511 /* Wait for all current users to finish */
2512 synchronize_rcu();
2513
2514 for_each_tracing_cpu(cpu) {
2515 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2516 per_cpu(trace_buffered_event, cpu) = NULL;
2517 }
2518
2519 /*
2520 * Wait for all CPUs that potentially started checking if they can use
2521 * their event buffer only after the previous synchronize_rcu() call and
2522 * they still read a valid pointer from trace_buffered_event. It must be
2523 * ensured they don't see cleared trace_buffered_event_cnt else they
2524 * could wrongly decide to use the pointed-to buffer which is now freed.
2525 */
2526 synchronize_rcu();
2527
2528 /* For each CPU, relinquish the buffer */
2529 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2530 true);
2531 }
2532
2533 static struct ring_buffer *temp_buffer;
2534
2535 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2536 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2537 struct trace_event_file *trace_file,
2538 int type, unsigned long len,
2539 unsigned long flags, int pc)
2540 {
2541 struct ring_buffer_event *entry;
2542 int val;
2543
2544 *current_rb = trace_file->tr->trace_buffer.buffer;
2545
2546 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2547 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2548 (entry = this_cpu_read(trace_buffered_event))) {
2549 /* Try to use the per cpu buffer first */
2550 val = this_cpu_inc_return(trace_buffered_event_cnt);
2551 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2552 trace_event_setup(entry, type, flags, pc);
2553 entry->array[0] = len;
2554 return entry;
2555 }
2556 this_cpu_dec(trace_buffered_event_cnt);
2557 }
2558
2559 entry = __trace_buffer_lock_reserve(*current_rb,
2560 type, len, flags, pc);
2561 /*
2562 * If tracing is off, but we have triggers enabled
2563 * we still need to look at the event data. Use the temp_buffer
2564 * to store the trace event for the trigger to use. It's recursive
2565 * safe and will not be recorded anywhere.
2566 */
2567 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2568 *current_rb = temp_buffer;
2569 entry = __trace_buffer_lock_reserve(*current_rb,
2570 type, len, flags, pc);
2571 }
2572 return entry;
2573 }
2574 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2575
2576 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2577 static DEFINE_MUTEX(tracepoint_printk_mutex);
2578
output_printk(struct trace_event_buffer * fbuffer)2579 static void output_printk(struct trace_event_buffer *fbuffer)
2580 {
2581 struct trace_event_call *event_call;
2582 struct trace_event *event;
2583 unsigned long flags;
2584 struct trace_iterator *iter = tracepoint_print_iter;
2585
2586 /* We should never get here if iter is NULL */
2587 if (WARN_ON_ONCE(!iter))
2588 return;
2589
2590 event_call = fbuffer->trace_file->event_call;
2591 if (!event_call || !event_call->event.funcs ||
2592 !event_call->event.funcs->trace)
2593 return;
2594
2595 event = &fbuffer->trace_file->event_call->event;
2596
2597 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2598 trace_seq_init(&iter->seq);
2599 iter->ent = fbuffer->entry;
2600 event_call->event.funcs->trace(iter, 0, event);
2601 trace_seq_putc(&iter->seq, 0);
2602 printk("%s", iter->seq.buffer);
2603
2604 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2605 }
2606
tracepoint_printk_sysctl(struct ctl_table * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2607 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2608 void __user *buffer, size_t *lenp,
2609 loff_t *ppos)
2610 {
2611 int save_tracepoint_printk;
2612 int ret;
2613
2614 mutex_lock(&tracepoint_printk_mutex);
2615 save_tracepoint_printk = tracepoint_printk;
2616
2617 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2618
2619 /*
2620 * This will force exiting early, as tracepoint_printk
2621 * is always zero when tracepoint_printk_iter is not allocated
2622 */
2623 if (!tracepoint_print_iter)
2624 tracepoint_printk = 0;
2625
2626 if (save_tracepoint_printk == tracepoint_printk)
2627 goto out;
2628
2629 if (tracepoint_printk)
2630 static_key_enable(&tracepoint_printk_key.key);
2631 else
2632 static_key_disable(&tracepoint_printk_key.key);
2633
2634 out:
2635 mutex_unlock(&tracepoint_printk_mutex);
2636
2637 return ret;
2638 }
2639
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2640 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2641 {
2642 if (static_key_false(&tracepoint_printk_key.key))
2643 output_printk(fbuffer);
2644
2645 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2646 fbuffer->event, fbuffer->entry,
2647 fbuffer->flags, fbuffer->pc);
2648 }
2649 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2650
2651 /*
2652 * Skip 3:
2653 *
2654 * trace_buffer_unlock_commit_regs()
2655 * trace_event_buffer_commit()
2656 * trace_event_raw_event_xxx()
2657 */
2658 # define STACK_SKIP 3
2659
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2660 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2661 struct ring_buffer *buffer,
2662 struct ring_buffer_event *event,
2663 unsigned long flags, int pc,
2664 struct pt_regs *regs)
2665 {
2666 __buffer_unlock_commit(buffer, event);
2667
2668 /*
2669 * If regs is not set, then skip the necessary functions.
2670 * Note, we can still get here via blktrace, wakeup tracer
2671 * and mmiotrace, but that's ok if they lose a function or
2672 * two. They are not that meaningful.
2673 */
2674 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2675 ftrace_trace_userstack(tr, buffer, flags, pc);
2676 }
2677
2678 /*
2679 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2680 */
2681 void
trace_buffer_unlock_commit_nostack(struct ring_buffer * buffer,struct ring_buffer_event * event)2682 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2683 struct ring_buffer_event *event)
2684 {
2685 __buffer_unlock_commit(buffer, event);
2686 }
2687
2688 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event)2689 trace_process_export(struct trace_export *export,
2690 struct ring_buffer_event *event)
2691 {
2692 struct trace_entry *entry;
2693 unsigned int size = 0;
2694
2695 entry = ring_buffer_event_data(event);
2696 size = ring_buffer_event_length(event);
2697 export->write(export, entry, size);
2698 }
2699
2700 static DEFINE_MUTEX(ftrace_export_lock);
2701
2702 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2703
2704 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2705
ftrace_exports_enable(void)2706 static inline void ftrace_exports_enable(void)
2707 {
2708 static_branch_enable(&ftrace_exports_enabled);
2709 }
2710
ftrace_exports_disable(void)2711 static inline void ftrace_exports_disable(void)
2712 {
2713 static_branch_disable(&ftrace_exports_enabled);
2714 }
2715
ftrace_exports(struct ring_buffer_event * event)2716 static void ftrace_exports(struct ring_buffer_event *event)
2717 {
2718 struct trace_export *export;
2719
2720 preempt_disable_notrace();
2721
2722 export = rcu_dereference_raw_check(ftrace_exports_list);
2723 while (export) {
2724 trace_process_export(export, event);
2725 export = rcu_dereference_raw_check(export->next);
2726 }
2727
2728 preempt_enable_notrace();
2729 }
2730
2731 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)2732 add_trace_export(struct trace_export **list, struct trace_export *export)
2733 {
2734 rcu_assign_pointer(export->next, *list);
2735 /*
2736 * We are entering export into the list but another
2737 * CPU might be walking that list. We need to make sure
2738 * the export->next pointer is valid before another CPU sees
2739 * the export pointer included into the list.
2740 */
2741 rcu_assign_pointer(*list, export);
2742 }
2743
2744 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)2745 rm_trace_export(struct trace_export **list, struct trace_export *export)
2746 {
2747 struct trace_export **p;
2748
2749 for (p = list; *p != NULL; p = &(*p)->next)
2750 if (*p == export)
2751 break;
2752
2753 if (*p != export)
2754 return -1;
2755
2756 rcu_assign_pointer(*p, (*p)->next);
2757
2758 return 0;
2759 }
2760
2761 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)2762 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2763 {
2764 if (*list == NULL)
2765 ftrace_exports_enable();
2766
2767 add_trace_export(list, export);
2768 }
2769
2770 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)2771 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2772 {
2773 int ret;
2774
2775 ret = rm_trace_export(list, export);
2776 if (*list == NULL)
2777 ftrace_exports_disable();
2778
2779 return ret;
2780 }
2781
register_ftrace_export(struct trace_export * export)2782 int register_ftrace_export(struct trace_export *export)
2783 {
2784 if (WARN_ON_ONCE(!export->write))
2785 return -1;
2786
2787 mutex_lock(&ftrace_export_lock);
2788
2789 add_ftrace_export(&ftrace_exports_list, export);
2790
2791 mutex_unlock(&ftrace_export_lock);
2792
2793 return 0;
2794 }
2795 EXPORT_SYMBOL_GPL(register_ftrace_export);
2796
unregister_ftrace_export(struct trace_export * export)2797 int unregister_ftrace_export(struct trace_export *export)
2798 {
2799 int ret;
2800
2801 mutex_lock(&ftrace_export_lock);
2802
2803 ret = rm_ftrace_export(&ftrace_exports_list, export);
2804
2805 mutex_unlock(&ftrace_export_lock);
2806
2807 return ret;
2808 }
2809 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2810
2811 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2812 trace_function(struct trace_array *tr,
2813 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2814 int pc)
2815 {
2816 struct trace_event_call *call = &event_function;
2817 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2818 struct ring_buffer_event *event;
2819 struct ftrace_entry *entry;
2820
2821 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2822 flags, pc);
2823 if (!event)
2824 return;
2825 entry = ring_buffer_event_data(event);
2826 entry->ip = ip;
2827 entry->parent_ip = parent_ip;
2828
2829 if (!call_filter_check_discard(call, entry, buffer, event)) {
2830 if (static_branch_unlikely(&ftrace_exports_enabled))
2831 ftrace_exports(event);
2832 __buffer_unlock_commit(buffer, event);
2833 }
2834 }
2835
2836 #ifdef CONFIG_STACKTRACE
2837
2838 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2839 #define FTRACE_KSTACK_NESTING 4
2840
2841 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2842
2843 struct ftrace_stack {
2844 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2845 };
2846
2847
2848 struct ftrace_stacks {
2849 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2850 };
2851
2852 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2853 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2854
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2855 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2856 unsigned long flags,
2857 int skip, int pc, struct pt_regs *regs)
2858 {
2859 struct trace_event_call *call = &event_kernel_stack;
2860 struct ring_buffer_event *event;
2861 unsigned int size, nr_entries;
2862 struct ftrace_stack *fstack;
2863 struct stack_entry *entry;
2864 int stackidx;
2865
2866 /*
2867 * Add one, for this function and the call to save_stack_trace()
2868 * If regs is set, then these functions will not be in the way.
2869 */
2870 #ifndef CONFIG_UNWINDER_ORC
2871 if (!regs)
2872 skip++;
2873 #endif
2874
2875 /*
2876 * Since events can happen in NMIs there's no safe way to
2877 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2878 * or NMI comes in, it will just have to use the default
2879 * FTRACE_STACK_SIZE.
2880 */
2881 preempt_disable_notrace();
2882
2883 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2884
2885 /* This should never happen. If it does, yell once and skip */
2886 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2887 goto out;
2888
2889 /*
2890 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2891 * interrupt will either see the value pre increment or post
2892 * increment. If the interrupt happens pre increment it will have
2893 * restored the counter when it returns. We just need a barrier to
2894 * keep gcc from moving things around.
2895 */
2896 barrier();
2897
2898 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2899 size = ARRAY_SIZE(fstack->calls);
2900
2901 if (regs) {
2902 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2903 size, skip);
2904 } else {
2905 nr_entries = stack_trace_save(fstack->calls, size, skip);
2906 }
2907
2908 size = nr_entries * sizeof(unsigned long);
2909 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2910 (sizeof(*entry) - sizeof(entry->caller)) + size,
2911 flags, pc);
2912 if (!event)
2913 goto out;
2914 entry = ring_buffer_event_data(event);
2915
2916 memcpy(&entry->caller, fstack->calls, size);
2917 entry->size = nr_entries;
2918
2919 if (!call_filter_check_discard(call, entry, buffer, event))
2920 __buffer_unlock_commit(buffer, event);
2921
2922 out:
2923 /* Again, don't let gcc optimize things here */
2924 barrier();
2925 __this_cpu_dec(ftrace_stack_reserve);
2926 preempt_enable_notrace();
2927
2928 }
2929
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2930 static inline void ftrace_trace_stack(struct trace_array *tr,
2931 struct ring_buffer *buffer,
2932 unsigned long flags,
2933 int skip, int pc, struct pt_regs *regs)
2934 {
2935 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2936 return;
2937
2938 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2939 }
2940
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)2941 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2942 int pc)
2943 {
2944 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2945
2946 if (rcu_is_watching()) {
2947 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2948 return;
2949 }
2950
2951 /*
2952 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2953 * but if the above rcu_is_watching() failed, then the NMI
2954 * triggered someplace critical, and rcu_irq_enter() should
2955 * not be called from NMI.
2956 */
2957 if (unlikely(in_nmi()))
2958 return;
2959
2960 rcu_irq_enter_irqson();
2961 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2962 rcu_irq_exit_irqson();
2963 }
2964
2965 /**
2966 * trace_dump_stack - record a stack back trace in the trace buffer
2967 * @skip: Number of functions to skip (helper handlers)
2968 */
trace_dump_stack(int skip)2969 void trace_dump_stack(int skip)
2970 {
2971 unsigned long flags;
2972
2973 if (tracing_disabled || tracing_selftest_running)
2974 return;
2975
2976 local_save_flags(flags);
2977
2978 #ifndef CONFIG_UNWINDER_ORC
2979 /* Skip 1 to skip this function. */
2980 skip++;
2981 #endif
2982 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2983 flags, skip, preempt_count(), NULL);
2984 }
2985 EXPORT_SYMBOL_GPL(trace_dump_stack);
2986
2987 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2988 static DEFINE_PER_CPU(int, user_stack_count);
2989
2990 static void
ftrace_trace_userstack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int pc)2991 ftrace_trace_userstack(struct trace_array *tr,
2992 struct ring_buffer *buffer, unsigned long flags, int pc)
2993 {
2994 struct trace_event_call *call = &event_user_stack;
2995 struct ring_buffer_event *event;
2996 struct userstack_entry *entry;
2997
2998 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2999 return;
3000
3001 /*
3002 * NMIs can not handle page faults, even with fix ups.
3003 * The save user stack can (and often does) fault.
3004 */
3005 if (unlikely(in_nmi()))
3006 return;
3007
3008 /*
3009 * prevent recursion, since the user stack tracing may
3010 * trigger other kernel events.
3011 */
3012 preempt_disable();
3013 if (__this_cpu_read(user_stack_count))
3014 goto out;
3015
3016 __this_cpu_inc(user_stack_count);
3017
3018 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3019 sizeof(*entry), flags, pc);
3020 if (!event)
3021 goto out_drop_count;
3022 entry = ring_buffer_event_data(event);
3023
3024 entry->tgid = current->tgid;
3025 memset(&entry->caller, 0, sizeof(entry->caller));
3026
3027 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3028 if (!call_filter_check_discard(call, entry, buffer, event))
3029 __buffer_unlock_commit(buffer, event);
3030
3031 out_drop_count:
3032 __this_cpu_dec(user_stack_count);
3033 out:
3034 preempt_enable();
3035 }
3036 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int pc)3037 static void ftrace_trace_userstack(struct trace_array *tr,
3038 struct ring_buffer *buffer,
3039 unsigned long flags, int pc)
3040 {
3041 }
3042 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3043
3044 #endif /* CONFIG_STACKTRACE */
3045
3046 /* created for use with alloc_percpu */
3047 struct trace_buffer_struct {
3048 int nesting;
3049 char buffer[4][TRACE_BUF_SIZE];
3050 };
3051
3052 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3053
3054 /*
3055 * Thise allows for lockless recording. If we're nested too deeply, then
3056 * this returns NULL.
3057 */
get_trace_buf(void)3058 static char *get_trace_buf(void)
3059 {
3060 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3061
3062 if (!trace_percpu_buffer || buffer->nesting >= 4)
3063 return NULL;
3064
3065 buffer->nesting++;
3066
3067 /* Interrupts must see nesting incremented before we use the buffer */
3068 barrier();
3069 return &buffer->buffer[buffer->nesting - 1][0];
3070 }
3071
put_trace_buf(void)3072 static void put_trace_buf(void)
3073 {
3074 /* Don't let the decrement of nesting leak before this */
3075 barrier();
3076 this_cpu_dec(trace_percpu_buffer->nesting);
3077 }
3078
alloc_percpu_trace_buffer(void)3079 static int alloc_percpu_trace_buffer(void)
3080 {
3081 struct trace_buffer_struct __percpu *buffers;
3082
3083 buffers = alloc_percpu(struct trace_buffer_struct);
3084 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3085 return -ENOMEM;
3086
3087 trace_percpu_buffer = buffers;
3088 return 0;
3089 }
3090
3091 static int buffers_allocated;
3092
trace_printk_init_buffers(void)3093 void trace_printk_init_buffers(void)
3094 {
3095 if (buffers_allocated)
3096 return;
3097
3098 if (alloc_percpu_trace_buffer())
3099 return;
3100
3101 /* trace_printk() is for debug use only. Don't use it in production. */
3102
3103 pr_warn("\n");
3104 pr_warn("**********************************************************\n");
3105 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3106 pr_warn("** **\n");
3107 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3108 pr_warn("** **\n");
3109 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3110 pr_warn("** unsafe for production use. **\n");
3111 pr_warn("** **\n");
3112 pr_warn("** If you see this message and you are not debugging **\n");
3113 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3114 pr_warn("** **\n");
3115 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3116 pr_warn("**********************************************************\n");
3117
3118 /* Expand the buffers to set size */
3119 tracing_update_buffers();
3120
3121 buffers_allocated = 1;
3122
3123 /*
3124 * trace_printk_init_buffers() can be called by modules.
3125 * If that happens, then we need to start cmdline recording
3126 * directly here. If the global_trace.buffer is already
3127 * allocated here, then this was called by module code.
3128 */
3129 if (global_trace.trace_buffer.buffer)
3130 tracing_start_cmdline_record();
3131 }
3132 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3133
trace_printk_start_comm(void)3134 void trace_printk_start_comm(void)
3135 {
3136 /* Start tracing comms if trace printk is set */
3137 if (!buffers_allocated)
3138 return;
3139 tracing_start_cmdline_record();
3140 }
3141
trace_printk_start_stop_comm(int enabled)3142 static void trace_printk_start_stop_comm(int enabled)
3143 {
3144 if (!buffers_allocated)
3145 return;
3146
3147 if (enabled)
3148 tracing_start_cmdline_record();
3149 else
3150 tracing_stop_cmdline_record();
3151 }
3152
3153 /**
3154 * trace_vbprintk - write binary msg to tracing buffer
3155 * @ip: The address of the caller
3156 * @fmt: The string format to write to the buffer
3157 * @args: Arguments for @fmt
3158 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3159 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3160 {
3161 struct trace_event_call *call = &event_bprint;
3162 struct ring_buffer_event *event;
3163 struct ring_buffer *buffer;
3164 struct trace_array *tr = &global_trace;
3165 struct bprint_entry *entry;
3166 unsigned long flags;
3167 char *tbuffer;
3168 int len = 0, size, pc;
3169
3170 if (unlikely(tracing_selftest_running || tracing_disabled))
3171 return 0;
3172
3173 /* Don't pollute graph traces with trace_vprintk internals */
3174 pause_graph_tracing();
3175
3176 pc = preempt_count();
3177 preempt_disable_notrace();
3178
3179 tbuffer = get_trace_buf();
3180 if (!tbuffer) {
3181 len = 0;
3182 goto out_nobuffer;
3183 }
3184
3185 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3186
3187 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3188 goto out;
3189
3190 local_save_flags(flags);
3191 size = sizeof(*entry) + sizeof(u32) * len;
3192 buffer = tr->trace_buffer.buffer;
3193 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3194 flags, pc);
3195 if (!event)
3196 goto out;
3197 entry = ring_buffer_event_data(event);
3198 entry->ip = ip;
3199 entry->fmt = fmt;
3200
3201 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3202 if (!call_filter_check_discard(call, entry, buffer, event)) {
3203 __buffer_unlock_commit(buffer, event);
3204 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3205 }
3206
3207 out:
3208 put_trace_buf();
3209
3210 out_nobuffer:
3211 preempt_enable_notrace();
3212 unpause_graph_tracing();
3213
3214 return len;
3215 }
3216 EXPORT_SYMBOL_GPL(trace_vbprintk);
3217
3218 __printf(3, 0)
3219 static int
__trace_array_vprintk(struct ring_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3220 __trace_array_vprintk(struct ring_buffer *buffer,
3221 unsigned long ip, const char *fmt, va_list args)
3222 {
3223 struct trace_event_call *call = &event_print;
3224 struct ring_buffer_event *event;
3225 int len = 0, size, pc;
3226 struct print_entry *entry;
3227 unsigned long flags;
3228 char *tbuffer;
3229
3230 if (tracing_disabled || tracing_selftest_running)
3231 return 0;
3232
3233 /* Don't pollute graph traces with trace_vprintk internals */
3234 pause_graph_tracing();
3235
3236 pc = preempt_count();
3237 preempt_disable_notrace();
3238
3239
3240 tbuffer = get_trace_buf();
3241 if (!tbuffer) {
3242 len = 0;
3243 goto out_nobuffer;
3244 }
3245
3246 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3247
3248 local_save_flags(flags);
3249 size = sizeof(*entry) + len + 1;
3250 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3251 flags, pc);
3252 if (!event)
3253 goto out;
3254 entry = ring_buffer_event_data(event);
3255 entry->ip = ip;
3256
3257 memcpy(&entry->buf, tbuffer, len + 1);
3258 if (!call_filter_check_discard(call, entry, buffer, event)) {
3259 __buffer_unlock_commit(buffer, event);
3260 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3261 }
3262
3263 out:
3264 put_trace_buf();
3265
3266 out_nobuffer:
3267 preempt_enable_notrace();
3268 unpause_graph_tracing();
3269
3270 return len;
3271 }
3272
3273 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3274 int trace_array_vprintk(struct trace_array *tr,
3275 unsigned long ip, const char *fmt, va_list args)
3276 {
3277 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3278 }
3279
3280 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3281 int trace_array_printk(struct trace_array *tr,
3282 unsigned long ip, const char *fmt, ...)
3283 {
3284 int ret;
3285 va_list ap;
3286
3287 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3288 return 0;
3289
3290 if (!tr)
3291 return -ENOENT;
3292
3293 va_start(ap, fmt);
3294 ret = trace_array_vprintk(tr, ip, fmt, ap);
3295 va_end(ap);
3296 return ret;
3297 }
3298 EXPORT_SYMBOL_GPL(trace_array_printk);
3299
3300 __printf(3, 4)
trace_array_printk_buf(struct ring_buffer * buffer,unsigned long ip,const char * fmt,...)3301 int trace_array_printk_buf(struct ring_buffer *buffer,
3302 unsigned long ip, const char *fmt, ...)
3303 {
3304 int ret;
3305 va_list ap;
3306
3307 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3308 return 0;
3309
3310 va_start(ap, fmt);
3311 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3312 va_end(ap);
3313 return ret;
3314 }
3315
3316 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3317 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3318 {
3319 return trace_array_vprintk(&global_trace, ip, fmt, args);
3320 }
3321 EXPORT_SYMBOL_GPL(trace_vprintk);
3322
trace_iterator_increment(struct trace_iterator * iter)3323 static void trace_iterator_increment(struct trace_iterator *iter)
3324 {
3325 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3326
3327 iter->idx++;
3328 if (buf_iter)
3329 ring_buffer_read(buf_iter, NULL);
3330 }
3331
3332 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3333 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3334 unsigned long *lost_events)
3335 {
3336 struct ring_buffer_event *event;
3337 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3338
3339 if (buf_iter)
3340 event = ring_buffer_iter_peek(buf_iter, ts);
3341 else
3342 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3343 lost_events);
3344
3345 if (event) {
3346 iter->ent_size = ring_buffer_event_length(event);
3347 return ring_buffer_event_data(event);
3348 }
3349 iter->ent_size = 0;
3350 return NULL;
3351 }
3352
3353 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3354 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3355 unsigned long *missing_events, u64 *ent_ts)
3356 {
3357 struct ring_buffer *buffer = iter->trace_buffer->buffer;
3358 struct trace_entry *ent, *next = NULL;
3359 unsigned long lost_events = 0, next_lost = 0;
3360 int cpu_file = iter->cpu_file;
3361 u64 next_ts = 0, ts;
3362 int next_cpu = -1;
3363 int next_size = 0;
3364 int cpu;
3365
3366 /*
3367 * If we are in a per_cpu trace file, don't bother by iterating over
3368 * all cpu and peek directly.
3369 */
3370 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3371 if (ring_buffer_empty_cpu(buffer, cpu_file))
3372 return NULL;
3373 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3374 if (ent_cpu)
3375 *ent_cpu = cpu_file;
3376
3377 return ent;
3378 }
3379
3380 for_each_tracing_cpu(cpu) {
3381
3382 if (ring_buffer_empty_cpu(buffer, cpu))
3383 continue;
3384
3385 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3386
3387 /*
3388 * Pick the entry with the smallest timestamp:
3389 */
3390 if (ent && (!next || ts < next_ts)) {
3391 next = ent;
3392 next_cpu = cpu;
3393 next_ts = ts;
3394 next_lost = lost_events;
3395 next_size = iter->ent_size;
3396 }
3397 }
3398
3399 iter->ent_size = next_size;
3400
3401 if (ent_cpu)
3402 *ent_cpu = next_cpu;
3403
3404 if (ent_ts)
3405 *ent_ts = next_ts;
3406
3407 if (missing_events)
3408 *missing_events = next_lost;
3409
3410 return next;
3411 }
3412
3413 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3414 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3415 int *ent_cpu, u64 *ent_ts)
3416 {
3417 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3418 }
3419
3420 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3421 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3422 {
3423 iter->ent = __find_next_entry(iter, &iter->cpu,
3424 &iter->lost_events, &iter->ts);
3425
3426 if (iter->ent)
3427 trace_iterator_increment(iter);
3428
3429 return iter->ent ? iter : NULL;
3430 }
3431
trace_consume(struct trace_iterator * iter)3432 static void trace_consume(struct trace_iterator *iter)
3433 {
3434 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3435 &iter->lost_events);
3436 }
3437
s_next(struct seq_file * m,void * v,loff_t * pos)3438 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3439 {
3440 struct trace_iterator *iter = m->private;
3441 int i = (int)*pos;
3442 void *ent;
3443
3444 WARN_ON_ONCE(iter->leftover);
3445
3446 (*pos)++;
3447
3448 /* can't go backwards */
3449 if (iter->idx > i)
3450 return NULL;
3451
3452 if (iter->idx < 0)
3453 ent = trace_find_next_entry_inc(iter);
3454 else
3455 ent = iter;
3456
3457 while (ent && iter->idx < i)
3458 ent = trace_find_next_entry_inc(iter);
3459
3460 iter->pos = *pos;
3461
3462 return ent;
3463 }
3464
tracing_iter_reset(struct trace_iterator * iter,int cpu)3465 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3466 {
3467 struct ring_buffer_event *event;
3468 struct ring_buffer_iter *buf_iter;
3469 unsigned long entries = 0;
3470 u64 ts;
3471
3472 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3473
3474 buf_iter = trace_buffer_iter(iter, cpu);
3475 if (!buf_iter)
3476 return;
3477
3478 ring_buffer_iter_reset(buf_iter);
3479
3480 /*
3481 * We could have the case with the max latency tracers
3482 * that a reset never took place on a cpu. This is evident
3483 * by the timestamp being before the start of the buffer.
3484 */
3485 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3486 if (ts >= iter->trace_buffer->time_start)
3487 break;
3488 entries++;
3489 ring_buffer_read(buf_iter, NULL);
3490 }
3491
3492 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3493 }
3494
3495 /*
3496 * The current tracer is copied to avoid a global locking
3497 * all around.
3498 */
s_start(struct seq_file * m,loff_t * pos)3499 static void *s_start(struct seq_file *m, loff_t *pos)
3500 {
3501 struct trace_iterator *iter = m->private;
3502 struct trace_array *tr = iter->tr;
3503 int cpu_file = iter->cpu_file;
3504 void *p = NULL;
3505 loff_t l = 0;
3506 int cpu;
3507
3508 /*
3509 * copy the tracer to avoid using a global lock all around.
3510 * iter->trace is a copy of current_trace, the pointer to the
3511 * name may be used instead of a strcmp(), as iter->trace->name
3512 * will point to the same string as current_trace->name.
3513 */
3514 mutex_lock(&trace_types_lock);
3515 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
3516 /* Close iter->trace before switching to the new current tracer */
3517 if (iter->trace->close)
3518 iter->trace->close(iter);
3519 *iter->trace = *tr->current_trace;
3520 /* Reopen the new current tracer */
3521 if (iter->trace->open)
3522 iter->trace->open(iter);
3523 }
3524 mutex_unlock(&trace_types_lock);
3525
3526 #ifdef CONFIG_TRACER_MAX_TRACE
3527 if (iter->snapshot && iter->trace->use_max_tr)
3528 return ERR_PTR(-EBUSY);
3529 #endif
3530
3531 if (*pos != iter->pos) {
3532 iter->ent = NULL;
3533 iter->cpu = 0;
3534 iter->idx = -1;
3535
3536 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3537 for_each_tracing_cpu(cpu)
3538 tracing_iter_reset(iter, cpu);
3539 } else
3540 tracing_iter_reset(iter, cpu_file);
3541
3542 iter->leftover = 0;
3543 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3544 ;
3545
3546 } else {
3547 /*
3548 * If we overflowed the seq_file before, then we want
3549 * to just reuse the trace_seq buffer again.
3550 */
3551 if (iter->leftover)
3552 p = iter;
3553 else {
3554 l = *pos - 1;
3555 p = s_next(m, p, &l);
3556 }
3557 }
3558
3559 trace_event_read_lock();
3560 trace_access_lock(cpu_file);
3561 return p;
3562 }
3563
s_stop(struct seq_file * m,void * p)3564 static void s_stop(struct seq_file *m, void *p)
3565 {
3566 struct trace_iterator *iter = m->private;
3567
3568 #ifdef CONFIG_TRACER_MAX_TRACE
3569 if (iter->snapshot && iter->trace->use_max_tr)
3570 return;
3571 #endif
3572
3573 trace_access_unlock(iter->cpu_file);
3574 trace_event_read_unlock();
3575 }
3576
3577 static void
get_total_entries_cpu(struct trace_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3578 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3579 unsigned long *entries, int cpu)
3580 {
3581 unsigned long count;
3582
3583 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3584 /*
3585 * If this buffer has skipped entries, then we hold all
3586 * entries for the trace and we need to ignore the
3587 * ones before the time stamp.
3588 */
3589 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3590 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3591 /* total is the same as the entries */
3592 *total = count;
3593 } else
3594 *total = count +
3595 ring_buffer_overrun_cpu(buf->buffer, cpu);
3596 *entries = count;
3597 }
3598
3599 static void
get_total_entries(struct trace_buffer * buf,unsigned long * total,unsigned long * entries)3600 get_total_entries(struct trace_buffer *buf,
3601 unsigned long *total, unsigned long *entries)
3602 {
3603 unsigned long t, e;
3604 int cpu;
3605
3606 *total = 0;
3607 *entries = 0;
3608
3609 for_each_tracing_cpu(cpu) {
3610 get_total_entries_cpu(buf, &t, &e, cpu);
3611 *total += t;
3612 *entries += e;
3613 }
3614 }
3615
trace_total_entries_cpu(struct trace_array * tr,int cpu)3616 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3617 {
3618 unsigned long total, entries;
3619
3620 if (!tr)
3621 tr = &global_trace;
3622
3623 get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3624
3625 return entries;
3626 }
3627
trace_total_entries(struct trace_array * tr)3628 unsigned long trace_total_entries(struct trace_array *tr)
3629 {
3630 unsigned long total, entries;
3631
3632 if (!tr)
3633 tr = &global_trace;
3634
3635 get_total_entries(&tr->trace_buffer, &total, &entries);
3636
3637 return entries;
3638 }
3639
print_lat_help_header(struct seq_file * m)3640 static void print_lat_help_header(struct seq_file *m)
3641 {
3642 seq_puts(m, "# _------=> CPU# \n"
3643 "# / _-----=> irqs-off \n"
3644 "# | / _----=> need-resched \n"
3645 "# || / _---=> hardirq/softirq \n"
3646 "# ||| / _--=> preempt-depth \n"
3647 "# |||| / delay \n"
3648 "# cmd pid ||||| time | caller \n"
3649 "# \\ / ||||| \\ | / \n");
3650 }
3651
print_event_info(struct trace_buffer * buf,struct seq_file * m)3652 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3653 {
3654 unsigned long total;
3655 unsigned long entries;
3656
3657 get_total_entries(buf, &total, &entries);
3658 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3659 entries, total, num_online_cpus());
3660 seq_puts(m, "#\n");
3661 }
3662
print_func_help_header(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3663 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3664 unsigned int flags)
3665 {
3666 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3667
3668 print_event_info(buf, m);
3669
3670 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3671 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3672 }
3673
print_func_help_header_irq(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3674 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3675 unsigned int flags)
3676 {
3677 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3678 const char *space = " ";
3679 int prec = tgid ? 12 : 2;
3680
3681 print_event_info(buf, m);
3682
3683 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3684 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3685 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3686 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3687 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3688 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3689 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3690 }
3691
3692 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3693 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3694 {
3695 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3696 struct trace_buffer *buf = iter->trace_buffer;
3697 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3698 struct tracer *type = iter->trace;
3699 unsigned long entries;
3700 unsigned long total;
3701 const char *name = "preemption";
3702
3703 name = type->name;
3704
3705 get_total_entries(buf, &total, &entries);
3706
3707 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3708 name, UTS_RELEASE);
3709 seq_puts(m, "# -----------------------------------"
3710 "---------------------------------\n");
3711 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3712 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3713 nsecs_to_usecs(data->saved_latency),
3714 entries,
3715 total,
3716 buf->cpu,
3717 #if defined(CONFIG_PREEMPT_NONE)
3718 "server",
3719 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3720 "desktop",
3721 #elif defined(CONFIG_PREEMPT)
3722 "preempt",
3723 #else
3724 "unknown",
3725 #endif
3726 /* These are reserved for later use */
3727 0, 0, 0, 0);
3728 #ifdef CONFIG_SMP
3729 seq_printf(m, " #P:%d)\n", num_online_cpus());
3730 #else
3731 seq_puts(m, ")\n");
3732 #endif
3733 seq_puts(m, "# -----------------\n");
3734 seq_printf(m, "# | task: %.16s-%d "
3735 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3736 data->comm, data->pid,
3737 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3738 data->policy, data->rt_priority);
3739 seq_puts(m, "# -----------------\n");
3740
3741 if (data->critical_start) {
3742 seq_puts(m, "# => started at: ");
3743 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3744 trace_print_seq(m, &iter->seq);
3745 seq_puts(m, "\n# => ended at: ");
3746 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3747 trace_print_seq(m, &iter->seq);
3748 seq_puts(m, "\n#\n");
3749 }
3750
3751 seq_puts(m, "#\n");
3752 }
3753
test_cpu_buff_start(struct trace_iterator * iter)3754 static void test_cpu_buff_start(struct trace_iterator *iter)
3755 {
3756 struct trace_seq *s = &iter->seq;
3757 struct trace_array *tr = iter->tr;
3758
3759 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3760 return;
3761
3762 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3763 return;
3764
3765 if (cpumask_available(iter->started) &&
3766 cpumask_test_cpu(iter->cpu, iter->started))
3767 return;
3768
3769 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3770 return;
3771
3772 if (cpumask_available(iter->started))
3773 cpumask_set_cpu(iter->cpu, iter->started);
3774
3775 /* Don't print started cpu buffer for the first entry of the trace */
3776 if (iter->idx > 1)
3777 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3778 iter->cpu);
3779 }
3780
print_trace_fmt(struct trace_iterator * iter)3781 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3782 {
3783 struct trace_array *tr = iter->tr;
3784 struct trace_seq *s = &iter->seq;
3785 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3786 struct trace_entry *entry;
3787 struct trace_event *event;
3788
3789 entry = iter->ent;
3790
3791 test_cpu_buff_start(iter);
3792
3793 event = ftrace_find_event(entry->type);
3794
3795 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3796 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3797 trace_print_lat_context(iter);
3798 else
3799 trace_print_context(iter);
3800 }
3801
3802 if (trace_seq_has_overflowed(s))
3803 return TRACE_TYPE_PARTIAL_LINE;
3804
3805 if (event)
3806 return event->funcs->trace(iter, sym_flags, event);
3807
3808 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3809
3810 return trace_handle_return(s);
3811 }
3812
print_raw_fmt(struct trace_iterator * iter)3813 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3814 {
3815 struct trace_array *tr = iter->tr;
3816 struct trace_seq *s = &iter->seq;
3817 struct trace_entry *entry;
3818 struct trace_event *event;
3819
3820 entry = iter->ent;
3821
3822 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3823 trace_seq_printf(s, "%d %d %llu ",
3824 entry->pid, iter->cpu, iter->ts);
3825
3826 if (trace_seq_has_overflowed(s))
3827 return TRACE_TYPE_PARTIAL_LINE;
3828
3829 event = ftrace_find_event(entry->type);
3830 if (event)
3831 return event->funcs->raw(iter, 0, event);
3832
3833 trace_seq_printf(s, "%d ?\n", entry->type);
3834
3835 return trace_handle_return(s);
3836 }
3837
print_hex_fmt(struct trace_iterator * iter)3838 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3839 {
3840 struct trace_array *tr = iter->tr;
3841 struct trace_seq *s = &iter->seq;
3842 unsigned char newline = '\n';
3843 struct trace_entry *entry;
3844 struct trace_event *event;
3845
3846 entry = iter->ent;
3847
3848 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3849 SEQ_PUT_HEX_FIELD(s, entry->pid);
3850 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3851 SEQ_PUT_HEX_FIELD(s, iter->ts);
3852 if (trace_seq_has_overflowed(s))
3853 return TRACE_TYPE_PARTIAL_LINE;
3854 }
3855
3856 event = ftrace_find_event(entry->type);
3857 if (event) {
3858 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3859 if (ret != TRACE_TYPE_HANDLED)
3860 return ret;
3861 }
3862
3863 SEQ_PUT_FIELD(s, newline);
3864
3865 return trace_handle_return(s);
3866 }
3867
print_bin_fmt(struct trace_iterator * iter)3868 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3869 {
3870 struct trace_array *tr = iter->tr;
3871 struct trace_seq *s = &iter->seq;
3872 struct trace_entry *entry;
3873 struct trace_event *event;
3874
3875 entry = iter->ent;
3876
3877 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3878 SEQ_PUT_FIELD(s, entry->pid);
3879 SEQ_PUT_FIELD(s, iter->cpu);
3880 SEQ_PUT_FIELD(s, iter->ts);
3881 if (trace_seq_has_overflowed(s))
3882 return TRACE_TYPE_PARTIAL_LINE;
3883 }
3884
3885 event = ftrace_find_event(entry->type);
3886 return event ? event->funcs->binary(iter, 0, event) :
3887 TRACE_TYPE_HANDLED;
3888 }
3889
trace_empty(struct trace_iterator * iter)3890 int trace_empty(struct trace_iterator *iter)
3891 {
3892 struct ring_buffer_iter *buf_iter;
3893 int cpu;
3894
3895 /* If we are looking at one CPU buffer, only check that one */
3896 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3897 cpu = iter->cpu_file;
3898 buf_iter = trace_buffer_iter(iter, cpu);
3899 if (buf_iter) {
3900 if (!ring_buffer_iter_empty(buf_iter))
3901 return 0;
3902 } else {
3903 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3904 return 0;
3905 }
3906 return 1;
3907 }
3908
3909 for_each_tracing_cpu(cpu) {
3910 buf_iter = trace_buffer_iter(iter, cpu);
3911 if (buf_iter) {
3912 if (!ring_buffer_iter_empty(buf_iter))
3913 return 0;
3914 } else {
3915 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3916 return 0;
3917 }
3918 }
3919
3920 return 1;
3921 }
3922
3923 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3924 enum print_line_t print_trace_line(struct trace_iterator *iter)
3925 {
3926 struct trace_array *tr = iter->tr;
3927 unsigned long trace_flags = tr->trace_flags;
3928 enum print_line_t ret;
3929
3930 if (iter->lost_events) {
3931 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3932 iter->cpu, iter->lost_events);
3933 if (trace_seq_has_overflowed(&iter->seq))
3934 return TRACE_TYPE_PARTIAL_LINE;
3935 }
3936
3937 if (iter->trace && iter->trace->print_line) {
3938 ret = iter->trace->print_line(iter);
3939 if (ret != TRACE_TYPE_UNHANDLED)
3940 return ret;
3941 }
3942
3943 if (iter->ent->type == TRACE_BPUTS &&
3944 trace_flags & TRACE_ITER_PRINTK &&
3945 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3946 return trace_print_bputs_msg_only(iter);
3947
3948 if (iter->ent->type == TRACE_BPRINT &&
3949 trace_flags & TRACE_ITER_PRINTK &&
3950 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3951 return trace_print_bprintk_msg_only(iter);
3952
3953 if (iter->ent->type == TRACE_PRINT &&
3954 trace_flags & TRACE_ITER_PRINTK &&
3955 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3956 return trace_print_printk_msg_only(iter);
3957
3958 if (trace_flags & TRACE_ITER_BIN)
3959 return print_bin_fmt(iter);
3960
3961 if (trace_flags & TRACE_ITER_HEX)
3962 return print_hex_fmt(iter);
3963
3964 if (trace_flags & TRACE_ITER_RAW)
3965 return print_raw_fmt(iter);
3966
3967 return print_trace_fmt(iter);
3968 }
3969
trace_latency_header(struct seq_file * m)3970 void trace_latency_header(struct seq_file *m)
3971 {
3972 struct trace_iterator *iter = m->private;
3973 struct trace_array *tr = iter->tr;
3974
3975 /* print nothing if the buffers are empty */
3976 if (trace_empty(iter))
3977 return;
3978
3979 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3980 print_trace_header(m, iter);
3981
3982 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3983 print_lat_help_header(m);
3984 }
3985
trace_default_header(struct seq_file * m)3986 void trace_default_header(struct seq_file *m)
3987 {
3988 struct trace_iterator *iter = m->private;
3989 struct trace_array *tr = iter->tr;
3990 unsigned long trace_flags = tr->trace_flags;
3991
3992 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3993 return;
3994
3995 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3996 /* print nothing if the buffers are empty */
3997 if (trace_empty(iter))
3998 return;
3999 print_trace_header(m, iter);
4000 if (!(trace_flags & TRACE_ITER_VERBOSE))
4001 print_lat_help_header(m);
4002 } else {
4003 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4004 if (trace_flags & TRACE_ITER_IRQ_INFO)
4005 print_func_help_header_irq(iter->trace_buffer,
4006 m, trace_flags);
4007 else
4008 print_func_help_header(iter->trace_buffer, m,
4009 trace_flags);
4010 }
4011 }
4012 }
4013
test_ftrace_alive(struct seq_file * m)4014 static void test_ftrace_alive(struct seq_file *m)
4015 {
4016 if (!ftrace_is_dead())
4017 return;
4018 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4019 "# MAY BE MISSING FUNCTION EVENTS\n");
4020 }
4021
4022 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4023 static void show_snapshot_main_help(struct seq_file *m)
4024 {
4025 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4026 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4027 "# Takes a snapshot of the main buffer.\n"
4028 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4029 "# (Doesn't have to be '2' works with any number that\n"
4030 "# is not a '0' or '1')\n");
4031 }
4032
show_snapshot_percpu_help(struct seq_file * m)4033 static void show_snapshot_percpu_help(struct seq_file *m)
4034 {
4035 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4036 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4037 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4038 "# Takes a snapshot of the main buffer for this cpu.\n");
4039 #else
4040 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4041 "# Must use main snapshot file to allocate.\n");
4042 #endif
4043 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4044 "# (Doesn't have to be '2' works with any number that\n"
4045 "# is not a '0' or '1')\n");
4046 }
4047
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4048 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4049 {
4050 if (iter->tr->allocated_snapshot)
4051 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4052 else
4053 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4054
4055 seq_puts(m, "# Snapshot commands:\n");
4056 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4057 show_snapshot_main_help(m);
4058 else
4059 show_snapshot_percpu_help(m);
4060 }
4061 #else
4062 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4063 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4064 #endif
4065
s_show(struct seq_file * m,void * v)4066 static int s_show(struct seq_file *m, void *v)
4067 {
4068 struct trace_iterator *iter = v;
4069 int ret;
4070
4071 if (iter->ent == NULL) {
4072 if (iter->tr) {
4073 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4074 seq_puts(m, "#\n");
4075 test_ftrace_alive(m);
4076 }
4077 if (iter->snapshot && trace_empty(iter))
4078 print_snapshot_help(m, iter);
4079 else if (iter->trace && iter->trace->print_header)
4080 iter->trace->print_header(m);
4081 else
4082 trace_default_header(m);
4083
4084 } else if (iter->leftover) {
4085 /*
4086 * If we filled the seq_file buffer earlier, we
4087 * want to just show it now.
4088 */
4089 ret = trace_print_seq(m, &iter->seq);
4090
4091 /* ret should this time be zero, but you never know */
4092 iter->leftover = ret;
4093
4094 } else {
4095 ret = print_trace_line(iter);
4096 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4097 iter->seq.full = 0;
4098 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4099 }
4100 ret = trace_print_seq(m, &iter->seq);
4101 /*
4102 * If we overflow the seq_file buffer, then it will
4103 * ask us for this data again at start up.
4104 * Use that instead.
4105 * ret is 0 if seq_file write succeeded.
4106 * -1 otherwise.
4107 */
4108 iter->leftover = ret;
4109 }
4110
4111 return 0;
4112 }
4113
4114 /*
4115 * Should be used after trace_array_get(), trace_types_lock
4116 * ensures that i_cdev was already initialized.
4117 */
tracing_get_cpu(struct inode * inode)4118 static inline int tracing_get_cpu(struct inode *inode)
4119 {
4120 if (inode->i_cdev) /* See trace_create_cpu_file() */
4121 return (long)inode->i_cdev - 1;
4122 return RING_BUFFER_ALL_CPUS;
4123 }
4124
4125 static const struct seq_operations tracer_seq_ops = {
4126 .start = s_start,
4127 .next = s_next,
4128 .stop = s_stop,
4129 .show = s_show,
4130 };
4131
4132 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4133 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4134 {
4135 struct trace_array *tr = inode->i_private;
4136 struct trace_iterator *iter;
4137 int cpu;
4138
4139 if (tracing_disabled)
4140 return ERR_PTR(-ENODEV);
4141
4142 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4143 if (!iter)
4144 return ERR_PTR(-ENOMEM);
4145
4146 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4147 GFP_KERNEL);
4148 if (!iter->buffer_iter)
4149 goto release;
4150
4151 /*
4152 * We make a copy of the current tracer to avoid concurrent
4153 * changes on it while we are reading.
4154 */
4155 mutex_lock(&trace_types_lock);
4156 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4157 if (!iter->trace)
4158 goto fail;
4159
4160 *iter->trace = *tr->current_trace;
4161
4162 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4163 goto fail;
4164
4165 iter->tr = tr;
4166
4167 #ifdef CONFIG_TRACER_MAX_TRACE
4168 /* Currently only the top directory has a snapshot */
4169 if (tr->current_trace->print_max || snapshot)
4170 iter->trace_buffer = &tr->max_buffer;
4171 else
4172 #endif
4173 iter->trace_buffer = &tr->trace_buffer;
4174 iter->snapshot = snapshot;
4175 iter->pos = -1;
4176 iter->cpu_file = tracing_get_cpu(inode);
4177 mutex_init(&iter->mutex);
4178
4179 /* Notify the tracer early; before we stop tracing. */
4180 if (iter->trace && iter->trace->open)
4181 iter->trace->open(iter);
4182
4183 /* Annotate start of buffers if we had overruns */
4184 if (ring_buffer_overruns(iter->trace_buffer->buffer))
4185 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4186
4187 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4188 if (trace_clocks[tr->clock_id].in_ns)
4189 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4190
4191 /* stop the trace while dumping if we are not opening "snapshot" */
4192 if (!iter->snapshot)
4193 tracing_stop_tr(tr);
4194
4195 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4196 for_each_tracing_cpu(cpu) {
4197 iter->buffer_iter[cpu] =
4198 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4199 cpu, GFP_KERNEL);
4200 }
4201 ring_buffer_read_prepare_sync();
4202 for_each_tracing_cpu(cpu) {
4203 ring_buffer_read_start(iter->buffer_iter[cpu]);
4204 tracing_iter_reset(iter, cpu);
4205 }
4206 } else {
4207 cpu = iter->cpu_file;
4208 iter->buffer_iter[cpu] =
4209 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4210 cpu, GFP_KERNEL);
4211 ring_buffer_read_prepare_sync();
4212 ring_buffer_read_start(iter->buffer_iter[cpu]);
4213 tracing_iter_reset(iter, cpu);
4214 }
4215
4216 mutex_unlock(&trace_types_lock);
4217
4218 return iter;
4219
4220 fail:
4221 mutex_unlock(&trace_types_lock);
4222 kfree(iter->trace);
4223 kfree(iter->buffer_iter);
4224 release:
4225 seq_release_private(inode, file);
4226 return ERR_PTR(-ENOMEM);
4227 }
4228
tracing_open_generic(struct inode * inode,struct file * filp)4229 int tracing_open_generic(struct inode *inode, struct file *filp)
4230 {
4231 int ret;
4232
4233 ret = tracing_check_open_get_tr(NULL);
4234 if (ret)
4235 return ret;
4236
4237 filp->private_data = inode->i_private;
4238 return 0;
4239 }
4240
tracing_is_disabled(void)4241 bool tracing_is_disabled(void)
4242 {
4243 return (tracing_disabled) ? true: false;
4244 }
4245
4246 /*
4247 * Open and update trace_array ref count.
4248 * Must have the current trace_array passed to it.
4249 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4250 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4251 {
4252 struct trace_array *tr = inode->i_private;
4253 int ret;
4254
4255 ret = tracing_check_open_get_tr(tr);
4256 if (ret)
4257 return ret;
4258
4259 filp->private_data = inode->i_private;
4260
4261 return 0;
4262 }
4263
4264 /*
4265 * The private pointer of the inode is the trace_event_file.
4266 * Update the tr ref count associated to it.
4267 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4268 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4269 {
4270 struct trace_event_file *file = inode->i_private;
4271 int ret;
4272
4273 ret = tracing_check_open_get_tr(file->tr);
4274 if (ret)
4275 return ret;
4276
4277 filp->private_data = inode->i_private;
4278
4279 return 0;
4280 }
4281
tracing_release_file_tr(struct inode * inode,struct file * filp)4282 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4283 {
4284 struct trace_event_file *file = inode->i_private;
4285
4286 trace_array_put(file->tr);
4287
4288 return 0;
4289 }
4290
tracing_release(struct inode * inode,struct file * file)4291 static int tracing_release(struct inode *inode, struct file *file)
4292 {
4293 struct trace_array *tr = inode->i_private;
4294 struct seq_file *m = file->private_data;
4295 struct trace_iterator *iter;
4296 int cpu;
4297
4298 if (!(file->f_mode & FMODE_READ)) {
4299 trace_array_put(tr);
4300 return 0;
4301 }
4302
4303 /* Writes do not use seq_file */
4304 iter = m->private;
4305 mutex_lock(&trace_types_lock);
4306
4307 for_each_tracing_cpu(cpu) {
4308 if (iter->buffer_iter[cpu])
4309 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4310 }
4311
4312 if (iter->trace && iter->trace->close)
4313 iter->trace->close(iter);
4314
4315 if (!iter->snapshot)
4316 /* reenable tracing if it was previously enabled */
4317 tracing_start_tr(tr);
4318
4319 __trace_array_put(tr);
4320
4321 mutex_unlock(&trace_types_lock);
4322
4323 mutex_destroy(&iter->mutex);
4324 free_cpumask_var(iter->started);
4325 kfree(iter->trace);
4326 kfree(iter->buffer_iter);
4327 seq_release_private(inode, file);
4328
4329 return 0;
4330 }
4331
tracing_release_generic_tr(struct inode * inode,struct file * file)4332 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4333 {
4334 struct trace_array *tr = inode->i_private;
4335
4336 trace_array_put(tr);
4337 return 0;
4338 }
4339
tracing_single_release_tr(struct inode * inode,struct file * file)4340 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4341 {
4342 struct trace_array *tr = inode->i_private;
4343
4344 trace_array_put(tr);
4345
4346 return single_release(inode, file);
4347 }
4348
tracing_open(struct inode * inode,struct file * file)4349 static int tracing_open(struct inode *inode, struct file *file)
4350 {
4351 struct trace_array *tr = inode->i_private;
4352 struct trace_iterator *iter;
4353 int ret;
4354
4355 ret = tracing_check_open_get_tr(tr);
4356 if (ret)
4357 return ret;
4358
4359 /* If this file was open for write, then erase contents */
4360 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4361 int cpu = tracing_get_cpu(inode);
4362 struct trace_buffer *trace_buf = &tr->trace_buffer;
4363
4364 #ifdef CONFIG_TRACER_MAX_TRACE
4365 if (tr->current_trace->print_max)
4366 trace_buf = &tr->max_buffer;
4367 #endif
4368
4369 if (cpu == RING_BUFFER_ALL_CPUS)
4370 tracing_reset_online_cpus(trace_buf);
4371 else
4372 tracing_reset_cpu(trace_buf, cpu);
4373 }
4374
4375 if (file->f_mode & FMODE_READ) {
4376 iter = __tracing_open(inode, file, false);
4377 if (IS_ERR(iter))
4378 ret = PTR_ERR(iter);
4379 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4380 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4381 }
4382
4383 if (ret < 0)
4384 trace_array_put(tr);
4385
4386 return ret;
4387 }
4388
4389 /*
4390 * Some tracers are not suitable for instance buffers.
4391 * A tracer is always available for the global array (toplevel)
4392 * or if it explicitly states that it is.
4393 */
4394 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4395 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4396 {
4397 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4398 }
4399
4400 /* Find the next tracer that this trace array may use */
4401 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4402 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4403 {
4404 while (t && !trace_ok_for_array(t, tr))
4405 t = t->next;
4406
4407 return t;
4408 }
4409
4410 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4411 t_next(struct seq_file *m, void *v, loff_t *pos)
4412 {
4413 struct trace_array *tr = m->private;
4414 struct tracer *t = v;
4415
4416 (*pos)++;
4417
4418 if (t)
4419 t = get_tracer_for_array(tr, t->next);
4420
4421 return t;
4422 }
4423
t_start(struct seq_file * m,loff_t * pos)4424 static void *t_start(struct seq_file *m, loff_t *pos)
4425 {
4426 struct trace_array *tr = m->private;
4427 struct tracer *t;
4428 loff_t l = 0;
4429
4430 mutex_lock(&trace_types_lock);
4431
4432 t = get_tracer_for_array(tr, trace_types);
4433 for (; t && l < *pos; t = t_next(m, t, &l))
4434 ;
4435
4436 return t;
4437 }
4438
t_stop(struct seq_file * m,void * p)4439 static void t_stop(struct seq_file *m, void *p)
4440 {
4441 mutex_unlock(&trace_types_lock);
4442 }
4443
t_show(struct seq_file * m,void * v)4444 static int t_show(struct seq_file *m, void *v)
4445 {
4446 struct tracer *t = v;
4447
4448 if (!t)
4449 return 0;
4450
4451 seq_puts(m, t->name);
4452 if (t->next)
4453 seq_putc(m, ' ');
4454 else
4455 seq_putc(m, '\n');
4456
4457 return 0;
4458 }
4459
4460 static const struct seq_operations show_traces_seq_ops = {
4461 .start = t_start,
4462 .next = t_next,
4463 .stop = t_stop,
4464 .show = t_show,
4465 };
4466
show_traces_open(struct inode * inode,struct file * file)4467 static int show_traces_open(struct inode *inode, struct file *file)
4468 {
4469 struct trace_array *tr = inode->i_private;
4470 struct seq_file *m;
4471 int ret;
4472
4473 ret = tracing_check_open_get_tr(tr);
4474 if (ret)
4475 return ret;
4476
4477 ret = seq_open(file, &show_traces_seq_ops);
4478 if (ret) {
4479 trace_array_put(tr);
4480 return ret;
4481 }
4482
4483 m = file->private_data;
4484 m->private = tr;
4485
4486 return 0;
4487 }
4488
show_traces_release(struct inode * inode,struct file * file)4489 static int show_traces_release(struct inode *inode, struct file *file)
4490 {
4491 struct trace_array *tr = inode->i_private;
4492
4493 trace_array_put(tr);
4494 return seq_release(inode, file);
4495 }
4496
4497 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4498 tracing_write_stub(struct file *filp, const char __user *ubuf,
4499 size_t count, loff_t *ppos)
4500 {
4501 return count;
4502 }
4503
tracing_lseek(struct file * file,loff_t offset,int whence)4504 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4505 {
4506 int ret;
4507
4508 if (file->f_mode & FMODE_READ)
4509 ret = seq_lseek(file, offset, whence);
4510 else
4511 file->f_pos = ret = 0;
4512
4513 return ret;
4514 }
4515
4516 static const struct file_operations tracing_fops = {
4517 .open = tracing_open,
4518 .read = seq_read,
4519 .write = tracing_write_stub,
4520 .llseek = tracing_lseek,
4521 .release = tracing_release,
4522 };
4523
4524 static const struct file_operations show_traces_fops = {
4525 .open = show_traces_open,
4526 .read = seq_read,
4527 .llseek = seq_lseek,
4528 .release = show_traces_release,
4529 };
4530
4531 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4532 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4533 size_t count, loff_t *ppos)
4534 {
4535 struct trace_array *tr = file_inode(filp)->i_private;
4536 char *mask_str;
4537 int len;
4538
4539 len = snprintf(NULL, 0, "%*pb\n",
4540 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4541 mask_str = kmalloc(len, GFP_KERNEL);
4542 if (!mask_str)
4543 return -ENOMEM;
4544
4545 len = snprintf(mask_str, len, "%*pb\n",
4546 cpumask_pr_args(tr->tracing_cpumask));
4547 if (len >= count) {
4548 count = -EINVAL;
4549 goto out_err;
4550 }
4551 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4552
4553 out_err:
4554 kfree(mask_str);
4555
4556 return count;
4557 }
4558
4559 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4560 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4561 size_t count, loff_t *ppos)
4562 {
4563 struct trace_array *tr = file_inode(filp)->i_private;
4564 cpumask_var_t tracing_cpumask_new;
4565 int err, cpu;
4566
4567 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4568 return -ENOMEM;
4569
4570 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4571 if (err)
4572 goto err_unlock;
4573
4574 local_irq_disable();
4575 arch_spin_lock(&tr->max_lock);
4576 for_each_tracing_cpu(cpu) {
4577 /*
4578 * Increase/decrease the disabled counter if we are
4579 * about to flip a bit in the cpumask:
4580 */
4581 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4582 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4583 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4584 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4585 }
4586 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4587 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4588 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4589 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4590 }
4591 }
4592 arch_spin_unlock(&tr->max_lock);
4593 local_irq_enable();
4594
4595 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4596 free_cpumask_var(tracing_cpumask_new);
4597
4598 return count;
4599
4600 err_unlock:
4601 free_cpumask_var(tracing_cpumask_new);
4602
4603 return err;
4604 }
4605
4606 static const struct file_operations tracing_cpumask_fops = {
4607 .open = tracing_open_generic_tr,
4608 .read = tracing_cpumask_read,
4609 .write = tracing_cpumask_write,
4610 .release = tracing_release_generic_tr,
4611 .llseek = generic_file_llseek,
4612 };
4613
tracing_trace_options_show(struct seq_file * m,void * v)4614 static int tracing_trace_options_show(struct seq_file *m, void *v)
4615 {
4616 struct tracer_opt *trace_opts;
4617 struct trace_array *tr = m->private;
4618 u32 tracer_flags;
4619 int i;
4620
4621 mutex_lock(&trace_types_lock);
4622 tracer_flags = tr->current_trace->flags->val;
4623 trace_opts = tr->current_trace->flags->opts;
4624
4625 for (i = 0; trace_options[i]; i++) {
4626 if (tr->trace_flags & (1 << i))
4627 seq_printf(m, "%s\n", trace_options[i]);
4628 else
4629 seq_printf(m, "no%s\n", trace_options[i]);
4630 }
4631
4632 for (i = 0; trace_opts[i].name; i++) {
4633 if (tracer_flags & trace_opts[i].bit)
4634 seq_printf(m, "%s\n", trace_opts[i].name);
4635 else
4636 seq_printf(m, "no%s\n", trace_opts[i].name);
4637 }
4638 mutex_unlock(&trace_types_lock);
4639
4640 return 0;
4641 }
4642
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4643 static int __set_tracer_option(struct trace_array *tr,
4644 struct tracer_flags *tracer_flags,
4645 struct tracer_opt *opts, int neg)
4646 {
4647 struct tracer *trace = tracer_flags->trace;
4648 int ret;
4649
4650 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4651 if (ret)
4652 return ret;
4653
4654 if (neg)
4655 tracer_flags->val &= ~opts->bit;
4656 else
4657 tracer_flags->val |= opts->bit;
4658 return 0;
4659 }
4660
4661 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4662 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4663 {
4664 struct tracer *trace = tr->current_trace;
4665 struct tracer_flags *tracer_flags = trace->flags;
4666 struct tracer_opt *opts = NULL;
4667 int i;
4668
4669 for (i = 0; tracer_flags->opts[i].name; i++) {
4670 opts = &tracer_flags->opts[i];
4671
4672 if (strcmp(cmp, opts->name) == 0)
4673 return __set_tracer_option(tr, trace->flags, opts, neg);
4674 }
4675
4676 return -EINVAL;
4677 }
4678
4679 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4680 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4681 {
4682 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4683 return -1;
4684
4685 return 0;
4686 }
4687
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4688 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4689 {
4690 int *map;
4691
4692 if ((mask == TRACE_ITER_RECORD_TGID) ||
4693 (mask == TRACE_ITER_RECORD_CMD))
4694 lockdep_assert_held(&event_mutex);
4695
4696 /* do nothing if flag is already set */
4697 if (!!(tr->trace_flags & mask) == !!enabled)
4698 return 0;
4699
4700 /* Give the tracer a chance to approve the change */
4701 if (tr->current_trace->flag_changed)
4702 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4703 return -EINVAL;
4704
4705 if (enabled)
4706 tr->trace_flags |= mask;
4707 else
4708 tr->trace_flags &= ~mask;
4709
4710 if (mask == TRACE_ITER_RECORD_CMD)
4711 trace_event_enable_cmd_record(enabled);
4712
4713 if (mask == TRACE_ITER_RECORD_TGID) {
4714 if (!tgid_map) {
4715 tgid_map_max = pid_max;
4716 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4717 GFP_KERNEL);
4718
4719 /*
4720 * Pairs with smp_load_acquire() in
4721 * trace_find_tgid_ptr() to ensure that if it observes
4722 * the tgid_map we just allocated then it also observes
4723 * the corresponding tgid_map_max value.
4724 */
4725 smp_store_release(&tgid_map, map);
4726 }
4727 if (!tgid_map) {
4728 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4729 return -ENOMEM;
4730 }
4731
4732 trace_event_enable_tgid_record(enabled);
4733 }
4734
4735 if (mask == TRACE_ITER_EVENT_FORK)
4736 trace_event_follow_fork(tr, enabled);
4737
4738 if (mask == TRACE_ITER_FUNC_FORK)
4739 ftrace_pid_follow_fork(tr, enabled);
4740
4741 if (mask == TRACE_ITER_OVERWRITE) {
4742 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4743 #ifdef CONFIG_TRACER_MAX_TRACE
4744 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4745 #endif
4746 }
4747
4748 if (mask == TRACE_ITER_PRINTK) {
4749 trace_printk_start_stop_comm(enabled);
4750 trace_printk_control(enabled);
4751 }
4752
4753 return 0;
4754 }
4755
trace_set_options(struct trace_array * tr,char * option)4756 static int trace_set_options(struct trace_array *tr, char *option)
4757 {
4758 char *cmp;
4759 int neg = 0;
4760 int ret;
4761 size_t orig_len = strlen(option);
4762 int len;
4763
4764 cmp = strstrip(option);
4765
4766 len = str_has_prefix(cmp, "no");
4767 if (len)
4768 neg = 1;
4769
4770 cmp += len;
4771
4772 mutex_lock(&event_mutex);
4773 mutex_lock(&trace_types_lock);
4774
4775 ret = match_string(trace_options, -1, cmp);
4776 /* If no option could be set, test the specific tracer options */
4777 if (ret < 0)
4778 ret = set_tracer_option(tr, cmp, neg);
4779 else
4780 ret = set_tracer_flag(tr, 1 << ret, !neg);
4781
4782 mutex_unlock(&trace_types_lock);
4783 mutex_unlock(&event_mutex);
4784
4785 /*
4786 * If the first trailing whitespace is replaced with '\0' by strstrip,
4787 * turn it back into a space.
4788 */
4789 if (orig_len > strlen(option))
4790 option[strlen(option)] = ' ';
4791
4792 return ret;
4793 }
4794
apply_trace_boot_options(void)4795 static void __init apply_trace_boot_options(void)
4796 {
4797 char *buf = trace_boot_options_buf;
4798 char *option;
4799
4800 while (true) {
4801 option = strsep(&buf, ",");
4802
4803 if (!option)
4804 break;
4805
4806 if (*option)
4807 trace_set_options(&global_trace, option);
4808
4809 /* Put back the comma to allow this to be called again */
4810 if (buf)
4811 *(buf - 1) = ',';
4812 }
4813 }
4814
4815 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4816 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4817 size_t cnt, loff_t *ppos)
4818 {
4819 struct seq_file *m = filp->private_data;
4820 struct trace_array *tr = m->private;
4821 char buf[64];
4822 int ret;
4823
4824 if (cnt >= sizeof(buf))
4825 return -EINVAL;
4826
4827 if (copy_from_user(buf, ubuf, cnt))
4828 return -EFAULT;
4829
4830 buf[cnt] = 0;
4831
4832 ret = trace_set_options(tr, buf);
4833 if (ret < 0)
4834 return ret;
4835
4836 *ppos += cnt;
4837
4838 return cnt;
4839 }
4840
tracing_trace_options_open(struct inode * inode,struct file * file)4841 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4842 {
4843 struct trace_array *tr = inode->i_private;
4844 int ret;
4845
4846 ret = tracing_check_open_get_tr(tr);
4847 if (ret)
4848 return ret;
4849
4850 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4851 if (ret < 0)
4852 trace_array_put(tr);
4853
4854 return ret;
4855 }
4856
4857 static const struct file_operations tracing_iter_fops = {
4858 .open = tracing_trace_options_open,
4859 .read = seq_read,
4860 .llseek = seq_lseek,
4861 .release = tracing_single_release_tr,
4862 .write = tracing_trace_options_write,
4863 };
4864
4865 static const char readme_msg[] =
4866 "tracing mini-HOWTO:\n\n"
4867 "# echo 0 > tracing_on : quick way to disable tracing\n"
4868 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4869 " Important files:\n"
4870 " trace\t\t\t- The static contents of the buffer\n"
4871 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4872 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4873 " current_tracer\t- function and latency tracers\n"
4874 " available_tracers\t- list of configured tracers for current_tracer\n"
4875 " error_log\t- error log for failed commands (that support it)\n"
4876 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4877 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4878 " trace_clock\t\t-change the clock used to order events\n"
4879 " local: Per cpu clock but may not be synced across CPUs\n"
4880 " global: Synced across CPUs but slows tracing down.\n"
4881 " counter: Not a clock, but just an increment\n"
4882 " uptime: Jiffy counter from time of boot\n"
4883 " perf: Same clock that perf events use\n"
4884 #ifdef CONFIG_X86_64
4885 " x86-tsc: TSC cycle counter\n"
4886 #endif
4887 "\n timestamp_mode\t-view the mode used to timestamp events\n"
4888 " delta: Delta difference against a buffer-wide timestamp\n"
4889 " absolute: Absolute (standalone) timestamp\n"
4890 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4891 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4892 " tracing_cpumask\t- Limit which CPUs to trace\n"
4893 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4894 "\t\t\t Remove sub-buffer with rmdir\n"
4895 " trace_options\t\t- Set format or modify how tracing happens\n"
4896 "\t\t\t Disable an option by prefixing 'no' to the\n"
4897 "\t\t\t option name\n"
4898 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4899 #ifdef CONFIG_DYNAMIC_FTRACE
4900 "\n available_filter_functions - list of functions that can be filtered on\n"
4901 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4902 "\t\t\t functions\n"
4903 "\t accepts: func_full_name or glob-matching-pattern\n"
4904 "\t modules: Can select a group via module\n"
4905 "\t Format: :mod:<module-name>\n"
4906 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4907 "\t triggers: a command to perform when function is hit\n"
4908 "\t Format: <function>:<trigger>[:count]\n"
4909 "\t trigger: traceon, traceoff\n"
4910 "\t\t enable_event:<system>:<event>\n"
4911 "\t\t disable_event:<system>:<event>\n"
4912 #ifdef CONFIG_STACKTRACE
4913 "\t\t stacktrace\n"
4914 #endif
4915 #ifdef CONFIG_TRACER_SNAPSHOT
4916 "\t\t snapshot\n"
4917 #endif
4918 "\t\t dump\n"
4919 "\t\t cpudump\n"
4920 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4921 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4922 "\t The first one will disable tracing every time do_fault is hit\n"
4923 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4924 "\t The first time do trap is hit and it disables tracing, the\n"
4925 "\t counter will decrement to 2. If tracing is already disabled,\n"
4926 "\t the counter will not decrement. It only decrements when the\n"
4927 "\t trigger did work\n"
4928 "\t To remove trigger without count:\n"
4929 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4930 "\t To remove trigger with a count:\n"
4931 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4932 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4933 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4934 "\t modules: Can select a group via module command :mod:\n"
4935 "\t Does not accept triggers\n"
4936 #endif /* CONFIG_DYNAMIC_FTRACE */
4937 #ifdef CONFIG_FUNCTION_TRACER
4938 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4939 "\t\t (function)\n"
4940 #endif
4941 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4942 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4943 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4944 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4945 #endif
4946 #ifdef CONFIG_TRACER_SNAPSHOT
4947 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4948 "\t\t\t snapshot buffer. Read the contents for more\n"
4949 "\t\t\t information\n"
4950 #endif
4951 #ifdef CONFIG_STACK_TRACER
4952 " stack_trace\t\t- Shows the max stack trace when active\n"
4953 " stack_max_size\t- Shows current max stack size that was traced\n"
4954 "\t\t\t Write into this file to reset the max size (trigger a\n"
4955 "\t\t\t new trace)\n"
4956 #ifdef CONFIG_DYNAMIC_FTRACE
4957 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4958 "\t\t\t traces\n"
4959 #endif
4960 #endif /* CONFIG_STACK_TRACER */
4961 #ifdef CONFIG_DYNAMIC_EVENTS
4962 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4963 "\t\t\t Write into this file to define/undefine new trace events.\n"
4964 #endif
4965 #ifdef CONFIG_KPROBE_EVENTS
4966 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4967 "\t\t\t Write into this file to define/undefine new trace events.\n"
4968 #endif
4969 #ifdef CONFIG_UPROBE_EVENTS
4970 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4971 "\t\t\t Write into this file to define/undefine new trace events.\n"
4972 #endif
4973 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4974 "\t accepts: event-definitions (one definition per line)\n"
4975 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
4976 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4977 #ifdef CONFIG_HIST_TRIGGERS
4978 "\t s:[synthetic/]<event> <field> [<field>]\n"
4979 #endif
4980 "\t -:[<group>/]<event>\n"
4981 #ifdef CONFIG_KPROBE_EVENTS
4982 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4983 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4984 #endif
4985 #ifdef CONFIG_UPROBE_EVENTS
4986 " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4987 #endif
4988 "\t args: <name>=fetcharg[:type]\n"
4989 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4990 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4991 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4992 #else
4993 "\t $stack<index>, $stack, $retval, $comm,\n"
4994 #endif
4995 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4996 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4997 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4998 "\t <type>\\[<array-size>\\]\n"
4999 #ifdef CONFIG_HIST_TRIGGERS
5000 "\t field: <stype> <name>;\n"
5001 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5002 "\t [unsigned] char/int/long\n"
5003 #endif
5004 #endif
5005 " events/\t\t- Directory containing all trace event subsystems:\n"
5006 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5007 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5008 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5009 "\t\t\t events\n"
5010 " filter\t\t- If set, only events passing filter are traced\n"
5011 " events/<system>/<event>/\t- Directory containing control files for\n"
5012 "\t\t\t <event>:\n"
5013 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5014 " filter\t\t- If set, only events passing filter are traced\n"
5015 " trigger\t\t- If set, a command to perform when event is hit\n"
5016 "\t Format: <trigger>[:count][if <filter>]\n"
5017 "\t trigger: traceon, traceoff\n"
5018 "\t enable_event:<system>:<event>\n"
5019 "\t disable_event:<system>:<event>\n"
5020 #ifdef CONFIG_HIST_TRIGGERS
5021 "\t enable_hist:<system>:<event>\n"
5022 "\t disable_hist:<system>:<event>\n"
5023 #endif
5024 #ifdef CONFIG_STACKTRACE
5025 "\t\t stacktrace\n"
5026 #endif
5027 #ifdef CONFIG_TRACER_SNAPSHOT
5028 "\t\t snapshot\n"
5029 #endif
5030 #ifdef CONFIG_HIST_TRIGGERS
5031 "\t\t hist (see below)\n"
5032 #endif
5033 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5034 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5035 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5036 "\t events/block/block_unplug/trigger\n"
5037 "\t The first disables tracing every time block_unplug is hit.\n"
5038 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5039 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5040 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5041 "\t Like function triggers, the counter is only decremented if it\n"
5042 "\t enabled or disabled tracing.\n"
5043 "\t To remove a trigger without a count:\n"
5044 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5045 "\t To remove a trigger with a count:\n"
5046 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5047 "\t Filters can be ignored when removing a trigger.\n"
5048 #ifdef CONFIG_HIST_TRIGGERS
5049 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5050 "\t Format: hist:keys=<field1[,field2,...]>\n"
5051 "\t [:values=<field1[,field2,...]>]\n"
5052 "\t [:sort=<field1[,field2,...]>]\n"
5053 "\t [:size=#entries]\n"
5054 "\t [:pause][:continue][:clear]\n"
5055 "\t [:name=histname1]\n"
5056 "\t [:<handler>.<action>]\n"
5057 "\t [if <filter>]\n\n"
5058 "\t Note, special fields can be used as well:\n"
5059 "\t common_timestamp - to record current timestamp\n"
5060 "\t common_cpu - to record the CPU the event happened on\n"
5061 "\n"
5062 "\t When a matching event is hit, an entry is added to a hash\n"
5063 "\t table using the key(s) and value(s) named, and the value of a\n"
5064 "\t sum called 'hitcount' is incremented. Keys and values\n"
5065 "\t correspond to fields in the event's format description. Keys\n"
5066 "\t can be any field, or the special string 'stacktrace'.\n"
5067 "\t Compound keys consisting of up to two fields can be specified\n"
5068 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5069 "\t fields. Sort keys consisting of up to two fields can be\n"
5070 "\t specified using the 'sort' keyword. The sort direction can\n"
5071 "\t be modified by appending '.descending' or '.ascending' to a\n"
5072 "\t sort field. The 'size' parameter can be used to specify more\n"
5073 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5074 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5075 "\t its histogram data will be shared with other triggers of the\n"
5076 "\t same name, and trigger hits will update this common data.\n\n"
5077 "\t Reading the 'hist' file for the event will dump the hash\n"
5078 "\t table in its entirety to stdout. If there are multiple hist\n"
5079 "\t triggers attached to an event, there will be a table for each\n"
5080 "\t trigger in the output. The table displayed for a named\n"
5081 "\t trigger will be the same as any other instance having the\n"
5082 "\t same name. The default format used to display a given field\n"
5083 "\t can be modified by appending any of the following modifiers\n"
5084 "\t to the field name, as applicable:\n\n"
5085 "\t .hex display a number as a hex value\n"
5086 "\t .sym display an address as a symbol\n"
5087 "\t .sym-offset display an address as a symbol and offset\n"
5088 "\t .execname display a common_pid as a program name\n"
5089 "\t .syscall display a syscall id as a syscall name\n"
5090 "\t .log2 display log2 value rather than raw number\n"
5091 "\t .usecs display a common_timestamp in microseconds\n\n"
5092 "\t The 'pause' parameter can be used to pause an existing hist\n"
5093 "\t trigger or to start a hist trigger but not log any events\n"
5094 "\t until told to do so. 'continue' can be used to start or\n"
5095 "\t restart a paused hist trigger.\n\n"
5096 "\t The 'clear' parameter will clear the contents of a running\n"
5097 "\t hist trigger and leave its current paused/active state\n"
5098 "\t unchanged.\n\n"
5099 "\t The enable_hist and disable_hist triggers can be used to\n"
5100 "\t have one event conditionally start and stop another event's\n"
5101 "\t already-attached hist trigger. The syntax is analogous to\n"
5102 "\t the enable_event and disable_event triggers.\n\n"
5103 "\t Hist trigger handlers and actions are executed whenever a\n"
5104 "\t a histogram entry is added or updated. They take the form:\n\n"
5105 "\t <handler>.<action>\n\n"
5106 "\t The available handlers are:\n\n"
5107 "\t onmatch(matching.event) - invoke on addition or update\n"
5108 "\t onmax(var) - invoke if var exceeds current max\n"
5109 "\t onchange(var) - invoke action if var changes\n\n"
5110 "\t The available actions are:\n\n"
5111 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5112 "\t save(field,...) - save current event fields\n"
5113 #ifdef CONFIG_TRACER_SNAPSHOT
5114 "\t snapshot() - snapshot the trace buffer\n"
5115 #endif
5116 #endif
5117 ;
5118
5119 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5120 tracing_readme_read(struct file *filp, char __user *ubuf,
5121 size_t cnt, loff_t *ppos)
5122 {
5123 return simple_read_from_buffer(ubuf, cnt, ppos,
5124 readme_msg, strlen(readme_msg));
5125 }
5126
5127 static const struct file_operations tracing_readme_fops = {
5128 .open = tracing_open_generic,
5129 .read = tracing_readme_read,
5130 .llseek = generic_file_llseek,
5131 };
5132
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5133 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5134 {
5135 int pid = ++(*pos);
5136
5137 return trace_find_tgid_ptr(pid);
5138 }
5139
saved_tgids_start(struct seq_file * m,loff_t * pos)5140 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5141 {
5142 int pid = *pos;
5143
5144 return trace_find_tgid_ptr(pid);
5145 }
5146
saved_tgids_stop(struct seq_file * m,void * v)5147 static void saved_tgids_stop(struct seq_file *m, void *v)
5148 {
5149 }
5150
saved_tgids_show(struct seq_file * m,void * v)5151 static int saved_tgids_show(struct seq_file *m, void *v)
5152 {
5153 int *entry = (int *)v;
5154 int pid = entry - tgid_map;
5155 int tgid = *entry;
5156
5157 if (tgid == 0)
5158 return SEQ_SKIP;
5159
5160 seq_printf(m, "%d %d\n", pid, tgid);
5161 return 0;
5162 }
5163
5164 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5165 .start = saved_tgids_start,
5166 .stop = saved_tgids_stop,
5167 .next = saved_tgids_next,
5168 .show = saved_tgids_show,
5169 };
5170
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5171 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5172 {
5173 int ret;
5174
5175 ret = tracing_check_open_get_tr(NULL);
5176 if (ret)
5177 return ret;
5178
5179 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5180 }
5181
5182
5183 static const struct file_operations tracing_saved_tgids_fops = {
5184 .open = tracing_saved_tgids_open,
5185 .read = seq_read,
5186 .llseek = seq_lseek,
5187 .release = seq_release,
5188 };
5189
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5190 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5191 {
5192 unsigned int *ptr = v;
5193
5194 if (*pos || m->count)
5195 ptr++;
5196
5197 (*pos)++;
5198
5199 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5200 ptr++) {
5201 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5202 continue;
5203
5204 return ptr;
5205 }
5206
5207 return NULL;
5208 }
5209
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5210 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5211 {
5212 void *v;
5213 loff_t l = 0;
5214
5215 preempt_disable();
5216 arch_spin_lock(&trace_cmdline_lock);
5217
5218 v = &savedcmd->map_cmdline_to_pid[0];
5219 while (l <= *pos) {
5220 v = saved_cmdlines_next(m, v, &l);
5221 if (!v)
5222 return NULL;
5223 }
5224
5225 return v;
5226 }
5227
saved_cmdlines_stop(struct seq_file * m,void * v)5228 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5229 {
5230 arch_spin_unlock(&trace_cmdline_lock);
5231 preempt_enable();
5232 }
5233
saved_cmdlines_show(struct seq_file * m,void * v)5234 static int saved_cmdlines_show(struct seq_file *m, void *v)
5235 {
5236 char buf[TASK_COMM_LEN];
5237 unsigned int *pid = v;
5238
5239 __trace_find_cmdline(*pid, buf);
5240 seq_printf(m, "%d %s\n", *pid, buf);
5241 return 0;
5242 }
5243
5244 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5245 .start = saved_cmdlines_start,
5246 .next = saved_cmdlines_next,
5247 .stop = saved_cmdlines_stop,
5248 .show = saved_cmdlines_show,
5249 };
5250
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5251 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5252 {
5253 int ret;
5254
5255 ret = tracing_check_open_get_tr(NULL);
5256 if (ret)
5257 return ret;
5258
5259 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5260 }
5261
5262 static const struct file_operations tracing_saved_cmdlines_fops = {
5263 .open = tracing_saved_cmdlines_open,
5264 .read = seq_read,
5265 .llseek = seq_lseek,
5266 .release = seq_release,
5267 };
5268
5269 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5270 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5271 size_t cnt, loff_t *ppos)
5272 {
5273 char buf[64];
5274 int r;
5275
5276 preempt_disable();
5277 arch_spin_lock(&trace_cmdline_lock);
5278 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5279 arch_spin_unlock(&trace_cmdline_lock);
5280 preempt_enable();
5281
5282 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5283 }
5284
tracing_resize_saved_cmdlines(unsigned int val)5285 static int tracing_resize_saved_cmdlines(unsigned int val)
5286 {
5287 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5288
5289 s = allocate_cmdlines_buffer(val);
5290 if (!s)
5291 return -ENOMEM;
5292
5293 preempt_disable();
5294 arch_spin_lock(&trace_cmdline_lock);
5295 savedcmd_temp = savedcmd;
5296 savedcmd = s;
5297 arch_spin_unlock(&trace_cmdline_lock);
5298 preempt_enable();
5299 free_saved_cmdlines_buffer(savedcmd_temp);
5300
5301 return 0;
5302 }
5303
5304 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5305 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5306 size_t cnt, loff_t *ppos)
5307 {
5308 unsigned long val;
5309 int ret;
5310
5311 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5312 if (ret)
5313 return ret;
5314
5315 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5316 if (!val || val > PID_MAX_DEFAULT)
5317 return -EINVAL;
5318
5319 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5320 if (ret < 0)
5321 return ret;
5322
5323 *ppos += cnt;
5324
5325 return cnt;
5326 }
5327
5328 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5329 .open = tracing_open_generic,
5330 .read = tracing_saved_cmdlines_size_read,
5331 .write = tracing_saved_cmdlines_size_write,
5332 };
5333
5334 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5335 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5336 update_eval_map(union trace_eval_map_item *ptr)
5337 {
5338 if (!ptr->map.eval_string) {
5339 if (ptr->tail.next) {
5340 ptr = ptr->tail.next;
5341 /* Set ptr to the next real item (skip head) */
5342 ptr++;
5343 } else
5344 return NULL;
5345 }
5346 return ptr;
5347 }
5348
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5349 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5350 {
5351 union trace_eval_map_item *ptr = v;
5352
5353 /*
5354 * Paranoid! If ptr points to end, we don't want to increment past it.
5355 * This really should never happen.
5356 */
5357 ptr = update_eval_map(ptr);
5358 if (WARN_ON_ONCE(!ptr))
5359 return NULL;
5360
5361 ptr++;
5362
5363 (*pos)++;
5364
5365 ptr = update_eval_map(ptr);
5366
5367 return ptr;
5368 }
5369
eval_map_start(struct seq_file * m,loff_t * pos)5370 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5371 {
5372 union trace_eval_map_item *v;
5373 loff_t l = 0;
5374
5375 mutex_lock(&trace_eval_mutex);
5376
5377 v = trace_eval_maps;
5378 if (v)
5379 v++;
5380
5381 while (v && l < *pos) {
5382 v = eval_map_next(m, v, &l);
5383 }
5384
5385 return v;
5386 }
5387
eval_map_stop(struct seq_file * m,void * v)5388 static void eval_map_stop(struct seq_file *m, void *v)
5389 {
5390 mutex_unlock(&trace_eval_mutex);
5391 }
5392
eval_map_show(struct seq_file * m,void * v)5393 static int eval_map_show(struct seq_file *m, void *v)
5394 {
5395 union trace_eval_map_item *ptr = v;
5396
5397 seq_printf(m, "%s %ld (%s)\n",
5398 ptr->map.eval_string, ptr->map.eval_value,
5399 ptr->map.system);
5400
5401 return 0;
5402 }
5403
5404 static const struct seq_operations tracing_eval_map_seq_ops = {
5405 .start = eval_map_start,
5406 .next = eval_map_next,
5407 .stop = eval_map_stop,
5408 .show = eval_map_show,
5409 };
5410
tracing_eval_map_open(struct inode * inode,struct file * filp)5411 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5412 {
5413 int ret;
5414
5415 ret = tracing_check_open_get_tr(NULL);
5416 if (ret)
5417 return ret;
5418
5419 return seq_open(filp, &tracing_eval_map_seq_ops);
5420 }
5421
5422 static const struct file_operations tracing_eval_map_fops = {
5423 .open = tracing_eval_map_open,
5424 .read = seq_read,
5425 .llseek = seq_lseek,
5426 .release = seq_release,
5427 };
5428
5429 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5430 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5431 {
5432 /* Return tail of array given the head */
5433 return ptr + ptr->head.length + 1;
5434 }
5435
5436 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5437 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5438 int len)
5439 {
5440 struct trace_eval_map **stop;
5441 struct trace_eval_map **map;
5442 union trace_eval_map_item *map_array;
5443 union trace_eval_map_item *ptr;
5444
5445 stop = start + len;
5446
5447 /*
5448 * The trace_eval_maps contains the map plus a head and tail item,
5449 * where the head holds the module and length of array, and the
5450 * tail holds a pointer to the next list.
5451 */
5452 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5453 if (!map_array) {
5454 pr_warn("Unable to allocate trace eval mapping\n");
5455 return;
5456 }
5457
5458 mutex_lock(&trace_eval_mutex);
5459
5460 if (!trace_eval_maps)
5461 trace_eval_maps = map_array;
5462 else {
5463 ptr = trace_eval_maps;
5464 for (;;) {
5465 ptr = trace_eval_jmp_to_tail(ptr);
5466 if (!ptr->tail.next)
5467 break;
5468 ptr = ptr->tail.next;
5469
5470 }
5471 ptr->tail.next = map_array;
5472 }
5473 map_array->head.mod = mod;
5474 map_array->head.length = len;
5475 map_array++;
5476
5477 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5478 map_array->map = **map;
5479 map_array++;
5480 }
5481 memset(map_array, 0, sizeof(*map_array));
5482
5483 mutex_unlock(&trace_eval_mutex);
5484 }
5485
trace_create_eval_file(struct dentry * d_tracer)5486 static void trace_create_eval_file(struct dentry *d_tracer)
5487 {
5488 trace_create_file("eval_map", 0444, d_tracer,
5489 NULL, &tracing_eval_map_fops);
5490 }
5491
5492 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5493 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5494 static inline void trace_insert_eval_map_file(struct module *mod,
5495 struct trace_eval_map **start, int len) { }
5496 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5497
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5498 static void trace_insert_eval_map(struct module *mod,
5499 struct trace_eval_map **start, int len)
5500 {
5501 struct trace_eval_map **map;
5502
5503 if (len <= 0)
5504 return;
5505
5506 map = start;
5507
5508 trace_event_eval_update(map, len);
5509
5510 trace_insert_eval_map_file(mod, start, len);
5511 }
5512
5513 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5514 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5515 size_t cnt, loff_t *ppos)
5516 {
5517 struct trace_array *tr = filp->private_data;
5518 char buf[MAX_TRACER_SIZE+2];
5519 int r;
5520
5521 mutex_lock(&trace_types_lock);
5522 r = sprintf(buf, "%s\n", tr->current_trace->name);
5523 mutex_unlock(&trace_types_lock);
5524
5525 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5526 }
5527
tracer_init(struct tracer * t,struct trace_array * tr)5528 int tracer_init(struct tracer *t, struct trace_array *tr)
5529 {
5530 tracing_reset_online_cpus(&tr->trace_buffer);
5531 return t->init(tr);
5532 }
5533
set_buffer_entries(struct trace_buffer * buf,unsigned long val)5534 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5535 {
5536 int cpu;
5537
5538 for_each_tracing_cpu(cpu)
5539 per_cpu_ptr(buf->data, cpu)->entries = val;
5540 }
5541
5542 #ifdef CONFIG_TRACER_MAX_TRACE
5543 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct trace_buffer * trace_buf,struct trace_buffer * size_buf,int cpu_id)5544 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5545 struct trace_buffer *size_buf, int cpu_id)
5546 {
5547 int cpu, ret = 0;
5548
5549 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5550 for_each_tracing_cpu(cpu) {
5551 ret = ring_buffer_resize(trace_buf->buffer,
5552 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5553 if (ret < 0)
5554 break;
5555 per_cpu_ptr(trace_buf->data, cpu)->entries =
5556 per_cpu_ptr(size_buf->data, cpu)->entries;
5557 }
5558 } else {
5559 ret = ring_buffer_resize(trace_buf->buffer,
5560 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5561 if (ret == 0)
5562 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5563 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5564 }
5565
5566 return ret;
5567 }
5568 #endif /* CONFIG_TRACER_MAX_TRACE */
5569
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5570 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5571 unsigned long size, int cpu)
5572 {
5573 int ret;
5574
5575 /*
5576 * If kernel or user changes the size of the ring buffer
5577 * we use the size that was given, and we can forget about
5578 * expanding it later.
5579 */
5580 ring_buffer_expanded = true;
5581
5582 /* May be called before buffers are initialized */
5583 if (!tr->trace_buffer.buffer)
5584 return 0;
5585
5586 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5587 if (ret < 0)
5588 return ret;
5589
5590 #ifdef CONFIG_TRACER_MAX_TRACE
5591 if (!tr->current_trace->use_max_tr)
5592 goto out;
5593
5594 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5595 if (ret < 0) {
5596 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5597 &tr->trace_buffer, cpu);
5598 if (r < 0) {
5599 /*
5600 * AARGH! We are left with different
5601 * size max buffer!!!!
5602 * The max buffer is our "snapshot" buffer.
5603 * When a tracer needs a snapshot (one of the
5604 * latency tracers), it swaps the max buffer
5605 * with the saved snap shot. We succeeded to
5606 * update the size of the main buffer, but failed to
5607 * update the size of the max buffer. But when we tried
5608 * to reset the main buffer to the original size, we
5609 * failed there too. This is very unlikely to
5610 * happen, but if it does, warn and kill all
5611 * tracing.
5612 */
5613 WARN_ON(1);
5614 tracing_disabled = 1;
5615 }
5616 return ret;
5617 }
5618
5619 if (cpu == RING_BUFFER_ALL_CPUS)
5620 set_buffer_entries(&tr->max_buffer, size);
5621 else
5622 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5623
5624 out:
5625 #endif /* CONFIG_TRACER_MAX_TRACE */
5626
5627 if (cpu == RING_BUFFER_ALL_CPUS)
5628 set_buffer_entries(&tr->trace_buffer, size);
5629 else
5630 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5631
5632 return ret;
5633 }
5634
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5635 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5636 unsigned long size, int cpu_id)
5637 {
5638 int ret = size;
5639
5640 mutex_lock(&trace_types_lock);
5641
5642 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5643 /* make sure, this cpu is enabled in the mask */
5644 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5645 ret = -EINVAL;
5646 goto out;
5647 }
5648 }
5649
5650 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5651 if (ret < 0)
5652 ret = -ENOMEM;
5653
5654 out:
5655 mutex_unlock(&trace_types_lock);
5656
5657 return ret;
5658 }
5659
5660
5661 /**
5662 * tracing_update_buffers - used by tracing facility to expand ring buffers
5663 *
5664 * To save on memory when the tracing is never used on a system with it
5665 * configured in. The ring buffers are set to a minimum size. But once
5666 * a user starts to use the tracing facility, then they need to grow
5667 * to their default size.
5668 *
5669 * This function is to be called when a tracer is about to be used.
5670 */
tracing_update_buffers(void)5671 int tracing_update_buffers(void)
5672 {
5673 int ret = 0;
5674
5675 mutex_lock(&trace_types_lock);
5676 if (!ring_buffer_expanded)
5677 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5678 RING_BUFFER_ALL_CPUS);
5679 mutex_unlock(&trace_types_lock);
5680
5681 return ret;
5682 }
5683
5684 struct trace_option_dentry;
5685
5686 static void
5687 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5688
5689 /*
5690 * Used to clear out the tracer before deletion of an instance.
5691 * Must have trace_types_lock held.
5692 */
tracing_set_nop(struct trace_array * tr)5693 static void tracing_set_nop(struct trace_array *tr)
5694 {
5695 if (tr->current_trace == &nop_trace)
5696 return;
5697
5698 tr->current_trace->enabled--;
5699
5700 if (tr->current_trace->reset)
5701 tr->current_trace->reset(tr);
5702
5703 tr->current_trace = &nop_trace;
5704 }
5705
5706 static bool tracer_options_updated;
5707
add_tracer_options(struct trace_array * tr,struct tracer * t)5708 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5709 {
5710 /* Only enable if the directory has been created already. */
5711 if (!tr->dir)
5712 return;
5713
5714 /* Only create trace option files after update_tracer_options finish */
5715 if (!tracer_options_updated)
5716 return;
5717
5718 create_trace_option_files(tr, t);
5719 }
5720
tracing_set_tracer(struct trace_array * tr,const char * buf)5721 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5722 {
5723 struct tracer *t;
5724 #ifdef CONFIG_TRACER_MAX_TRACE
5725 bool had_max_tr;
5726 #endif
5727 int ret = 0;
5728
5729 mutex_lock(&trace_types_lock);
5730
5731 if (!ring_buffer_expanded) {
5732 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5733 RING_BUFFER_ALL_CPUS);
5734 if (ret < 0)
5735 goto out;
5736 ret = 0;
5737 }
5738
5739 for (t = trace_types; t; t = t->next) {
5740 if (strcmp(t->name, buf) == 0)
5741 break;
5742 }
5743 if (!t) {
5744 ret = -EINVAL;
5745 goto out;
5746 }
5747 if (t == tr->current_trace)
5748 goto out;
5749
5750 #ifdef CONFIG_TRACER_SNAPSHOT
5751 if (t->use_max_tr) {
5752 local_irq_disable();
5753 arch_spin_lock(&tr->max_lock);
5754 if (tr->cond_snapshot)
5755 ret = -EBUSY;
5756 arch_spin_unlock(&tr->max_lock);
5757 local_irq_enable();
5758 if (ret)
5759 goto out;
5760 }
5761 #endif
5762 /* Some tracers won't work on kernel command line */
5763 if (system_state < SYSTEM_RUNNING && t->noboot) {
5764 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5765 t->name);
5766 goto out;
5767 }
5768
5769 /* Some tracers are only allowed for the top level buffer */
5770 if (!trace_ok_for_array(t, tr)) {
5771 ret = -EINVAL;
5772 goto out;
5773 }
5774
5775 /* If trace pipe files are being read, we can't change the tracer */
5776 if (tr->current_trace->ref) {
5777 ret = -EBUSY;
5778 goto out;
5779 }
5780
5781 trace_branch_disable();
5782
5783 tr->current_trace->enabled--;
5784
5785 if (tr->current_trace->reset)
5786 tr->current_trace->reset(tr);
5787
5788 /* Current trace needs to be nop_trace before synchronize_rcu */
5789 tr->current_trace = &nop_trace;
5790
5791 #ifdef CONFIG_TRACER_MAX_TRACE
5792 had_max_tr = tr->allocated_snapshot;
5793
5794 if (had_max_tr && !t->use_max_tr) {
5795 /*
5796 * We need to make sure that the update_max_tr sees that
5797 * current_trace changed to nop_trace to keep it from
5798 * swapping the buffers after we resize it.
5799 * The update_max_tr is called from interrupts disabled
5800 * so a synchronized_sched() is sufficient.
5801 */
5802 synchronize_rcu();
5803 free_snapshot(tr);
5804 }
5805 #endif
5806
5807 #ifdef CONFIG_TRACER_MAX_TRACE
5808 if (t->use_max_tr && !had_max_tr) {
5809 ret = tracing_alloc_snapshot_instance(tr);
5810 if (ret < 0)
5811 goto out;
5812 }
5813 #endif
5814
5815 if (t->init) {
5816 ret = tracer_init(t, tr);
5817 if (ret)
5818 goto out;
5819 }
5820
5821 tr->current_trace = t;
5822 tr->current_trace->enabled++;
5823 trace_branch_enable(tr);
5824 out:
5825 mutex_unlock(&trace_types_lock);
5826
5827 return ret;
5828 }
5829
5830 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5831 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5832 size_t cnt, loff_t *ppos)
5833 {
5834 struct trace_array *tr = filp->private_data;
5835 char buf[MAX_TRACER_SIZE+1];
5836 int i;
5837 size_t ret;
5838 int err;
5839
5840 ret = cnt;
5841
5842 if (cnt > MAX_TRACER_SIZE)
5843 cnt = MAX_TRACER_SIZE;
5844
5845 if (copy_from_user(buf, ubuf, cnt))
5846 return -EFAULT;
5847
5848 buf[cnt] = 0;
5849
5850 /* strip ending whitespace. */
5851 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5852 buf[i] = 0;
5853
5854 err = tracing_set_tracer(tr, buf);
5855 if (err)
5856 return err;
5857
5858 *ppos += ret;
5859
5860 return ret;
5861 }
5862
5863 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5864 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5865 size_t cnt, loff_t *ppos)
5866 {
5867 char buf[64];
5868 int r;
5869
5870 r = snprintf(buf, sizeof(buf), "%ld\n",
5871 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5872 if (r > sizeof(buf))
5873 r = sizeof(buf);
5874 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5875 }
5876
5877 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5878 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5879 size_t cnt, loff_t *ppos)
5880 {
5881 unsigned long val;
5882 int ret;
5883
5884 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5885 if (ret)
5886 return ret;
5887
5888 *ptr = val * 1000;
5889
5890 return cnt;
5891 }
5892
5893 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5894 tracing_thresh_read(struct file *filp, char __user *ubuf,
5895 size_t cnt, loff_t *ppos)
5896 {
5897 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5898 }
5899
5900 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5901 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5902 size_t cnt, loff_t *ppos)
5903 {
5904 struct trace_array *tr = filp->private_data;
5905 int ret;
5906
5907 mutex_lock(&trace_types_lock);
5908 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5909 if (ret < 0)
5910 goto out;
5911
5912 if (tr->current_trace->update_thresh) {
5913 ret = tr->current_trace->update_thresh(tr);
5914 if (ret < 0)
5915 goto out;
5916 }
5917
5918 ret = cnt;
5919 out:
5920 mutex_unlock(&trace_types_lock);
5921
5922 return ret;
5923 }
5924
5925 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5926
5927 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5928 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5929 size_t cnt, loff_t *ppos)
5930 {
5931 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5932 }
5933
5934 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5935 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5936 size_t cnt, loff_t *ppos)
5937 {
5938 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5939 }
5940
5941 #endif
5942
tracing_open_pipe(struct inode * inode,struct file * filp)5943 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5944 {
5945 struct trace_array *tr = inode->i_private;
5946 struct trace_iterator *iter;
5947 int ret;
5948
5949 ret = tracing_check_open_get_tr(tr);
5950 if (ret)
5951 return ret;
5952
5953 mutex_lock(&trace_types_lock);
5954
5955 /* create a buffer to store the information to pass to userspace */
5956 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5957 if (!iter) {
5958 ret = -ENOMEM;
5959 __trace_array_put(tr);
5960 goto out;
5961 }
5962
5963 trace_seq_init(&iter->seq);
5964 iter->trace = tr->current_trace;
5965
5966 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5967 ret = -ENOMEM;
5968 goto fail;
5969 }
5970
5971 /* trace pipe does not show start of buffer */
5972 cpumask_setall(iter->started);
5973
5974 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5975 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5976
5977 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5978 if (trace_clocks[tr->clock_id].in_ns)
5979 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5980
5981 iter->tr = tr;
5982 iter->trace_buffer = &tr->trace_buffer;
5983 iter->cpu_file = tracing_get_cpu(inode);
5984 mutex_init(&iter->mutex);
5985 filp->private_data = iter;
5986
5987 if (iter->trace->pipe_open)
5988 iter->trace->pipe_open(iter);
5989
5990 nonseekable_open(inode, filp);
5991
5992 tr->current_trace->ref++;
5993 out:
5994 mutex_unlock(&trace_types_lock);
5995 return ret;
5996
5997 fail:
5998 kfree(iter);
5999 __trace_array_put(tr);
6000 mutex_unlock(&trace_types_lock);
6001 return ret;
6002 }
6003
tracing_release_pipe(struct inode * inode,struct file * file)6004 static int tracing_release_pipe(struct inode *inode, struct file *file)
6005 {
6006 struct trace_iterator *iter = file->private_data;
6007 struct trace_array *tr = inode->i_private;
6008
6009 mutex_lock(&trace_types_lock);
6010
6011 tr->current_trace->ref--;
6012
6013 if (iter->trace->pipe_close)
6014 iter->trace->pipe_close(iter);
6015
6016 mutex_unlock(&trace_types_lock);
6017
6018 free_cpumask_var(iter->started);
6019 mutex_destroy(&iter->mutex);
6020 kfree(iter);
6021
6022 trace_array_put(tr);
6023
6024 return 0;
6025 }
6026
6027 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6028 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6029 {
6030 struct trace_array *tr = iter->tr;
6031
6032 /* Iterators are static, they should be filled or empty */
6033 if (trace_buffer_iter(iter, iter->cpu_file))
6034 return EPOLLIN | EPOLLRDNORM;
6035
6036 if (tr->trace_flags & TRACE_ITER_BLOCK)
6037 /*
6038 * Always select as readable when in blocking mode
6039 */
6040 return EPOLLIN | EPOLLRDNORM;
6041 else
6042 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
6043 filp, poll_table);
6044 }
6045
6046 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6047 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6048 {
6049 struct trace_iterator *iter = filp->private_data;
6050
6051 return trace_poll(iter, filp, poll_table);
6052 }
6053
6054 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6055 static int tracing_wait_pipe(struct file *filp)
6056 {
6057 struct trace_iterator *iter = filp->private_data;
6058 int ret;
6059
6060 while (trace_empty(iter)) {
6061
6062 if ((filp->f_flags & O_NONBLOCK)) {
6063 return -EAGAIN;
6064 }
6065
6066 /*
6067 * We block until we read something and tracing is disabled.
6068 * We still block if tracing is disabled, but we have never
6069 * read anything. This allows a user to cat this file, and
6070 * then enable tracing. But after we have read something,
6071 * we give an EOF when tracing is again disabled.
6072 *
6073 * iter->pos will be 0 if we haven't read anything.
6074 */
6075 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6076 break;
6077
6078 mutex_unlock(&iter->mutex);
6079
6080 ret = wait_on_pipe(iter, 0);
6081
6082 mutex_lock(&iter->mutex);
6083
6084 if (ret)
6085 return ret;
6086 }
6087
6088 return 1;
6089 }
6090
6091 /*
6092 * Consumer reader.
6093 */
6094 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6095 tracing_read_pipe(struct file *filp, char __user *ubuf,
6096 size_t cnt, loff_t *ppos)
6097 {
6098 struct trace_iterator *iter = filp->private_data;
6099 ssize_t sret;
6100
6101 /*
6102 * Avoid more than one consumer on a single file descriptor
6103 * This is just a matter of traces coherency, the ring buffer itself
6104 * is protected.
6105 */
6106 mutex_lock(&iter->mutex);
6107
6108 /* return any leftover data */
6109 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6110 if (sret != -EBUSY)
6111 goto out;
6112
6113 trace_seq_init(&iter->seq);
6114
6115 if (iter->trace->read) {
6116 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6117 if (sret)
6118 goto out;
6119 }
6120
6121 waitagain:
6122 sret = tracing_wait_pipe(filp);
6123 if (sret <= 0)
6124 goto out;
6125
6126 /* stop when tracing is finished */
6127 if (trace_empty(iter)) {
6128 sret = 0;
6129 goto out;
6130 }
6131
6132 if (cnt >= PAGE_SIZE)
6133 cnt = PAGE_SIZE - 1;
6134
6135 /* reset all but tr, trace, and overruns */
6136 memset(&iter->seq, 0,
6137 sizeof(struct trace_iterator) -
6138 offsetof(struct trace_iterator, seq));
6139 cpumask_clear(iter->started);
6140 trace_seq_init(&iter->seq);
6141 iter->pos = -1;
6142
6143 trace_event_read_lock();
6144 trace_access_lock(iter->cpu_file);
6145 while (trace_find_next_entry_inc(iter) != NULL) {
6146 enum print_line_t ret;
6147 int save_len = iter->seq.seq.len;
6148
6149 ret = print_trace_line(iter);
6150 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6151 /*
6152 * If one print_trace_line() fills entire trace_seq in one shot,
6153 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6154 * In this case, we need to consume it, otherwise, loop will peek
6155 * this event next time, resulting in an infinite loop.
6156 */
6157 if (save_len == 0) {
6158 iter->seq.full = 0;
6159 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6160 trace_consume(iter);
6161 break;
6162 }
6163
6164 /* In other cases, don't print partial lines */
6165 iter->seq.seq.len = save_len;
6166 break;
6167 }
6168 if (ret != TRACE_TYPE_NO_CONSUME)
6169 trace_consume(iter);
6170
6171 if (trace_seq_used(&iter->seq) >= cnt)
6172 break;
6173
6174 /*
6175 * Setting the full flag means we reached the trace_seq buffer
6176 * size and we should leave by partial output condition above.
6177 * One of the trace_seq_* functions is not used properly.
6178 */
6179 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6180 iter->ent->type);
6181 }
6182 trace_access_unlock(iter->cpu_file);
6183 trace_event_read_unlock();
6184
6185 /* Now copy what we have to the user */
6186 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6187 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6188 trace_seq_init(&iter->seq);
6189
6190 /*
6191 * If there was nothing to send to user, in spite of consuming trace
6192 * entries, go back to wait for more entries.
6193 */
6194 if (sret == -EBUSY)
6195 goto waitagain;
6196
6197 out:
6198 mutex_unlock(&iter->mutex);
6199
6200 return sret;
6201 }
6202
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6203 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6204 unsigned int idx)
6205 {
6206 __free_page(spd->pages[idx]);
6207 }
6208
6209 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6210 .confirm = generic_pipe_buf_confirm,
6211 .release = generic_pipe_buf_release,
6212 .steal = generic_pipe_buf_steal,
6213 .get = generic_pipe_buf_get,
6214 };
6215
6216 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6217 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6218 {
6219 size_t count;
6220 int save_len;
6221 int ret;
6222
6223 /* Seq buffer is page-sized, exactly what we need. */
6224 for (;;) {
6225 save_len = iter->seq.seq.len;
6226 ret = print_trace_line(iter);
6227
6228 if (trace_seq_has_overflowed(&iter->seq)) {
6229 iter->seq.seq.len = save_len;
6230 break;
6231 }
6232
6233 /*
6234 * This should not be hit, because it should only
6235 * be set if the iter->seq overflowed. But check it
6236 * anyway to be safe.
6237 */
6238 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6239 iter->seq.seq.len = save_len;
6240 break;
6241 }
6242
6243 count = trace_seq_used(&iter->seq) - save_len;
6244 if (rem < count) {
6245 rem = 0;
6246 iter->seq.seq.len = save_len;
6247 break;
6248 }
6249
6250 if (ret != TRACE_TYPE_NO_CONSUME)
6251 trace_consume(iter);
6252 rem -= count;
6253 if (!trace_find_next_entry_inc(iter)) {
6254 rem = 0;
6255 iter->ent = NULL;
6256 break;
6257 }
6258 }
6259
6260 return rem;
6261 }
6262
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6263 static ssize_t tracing_splice_read_pipe(struct file *filp,
6264 loff_t *ppos,
6265 struct pipe_inode_info *pipe,
6266 size_t len,
6267 unsigned int flags)
6268 {
6269 struct page *pages_def[PIPE_DEF_BUFFERS];
6270 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6271 struct trace_iterator *iter = filp->private_data;
6272 struct splice_pipe_desc spd = {
6273 .pages = pages_def,
6274 .partial = partial_def,
6275 .nr_pages = 0, /* This gets updated below. */
6276 .nr_pages_max = PIPE_DEF_BUFFERS,
6277 .ops = &tracing_pipe_buf_ops,
6278 .spd_release = tracing_spd_release_pipe,
6279 };
6280 ssize_t ret;
6281 size_t rem;
6282 unsigned int i;
6283
6284 if (splice_grow_spd(pipe, &spd))
6285 return -ENOMEM;
6286
6287 mutex_lock(&iter->mutex);
6288
6289 if (iter->trace->splice_read) {
6290 ret = iter->trace->splice_read(iter, filp,
6291 ppos, pipe, len, flags);
6292 if (ret)
6293 goto out_err;
6294 }
6295
6296 ret = tracing_wait_pipe(filp);
6297 if (ret <= 0)
6298 goto out_err;
6299
6300 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6301 ret = -EFAULT;
6302 goto out_err;
6303 }
6304
6305 trace_event_read_lock();
6306 trace_access_lock(iter->cpu_file);
6307
6308 /* Fill as many pages as possible. */
6309 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6310 spd.pages[i] = alloc_page(GFP_KERNEL);
6311 if (!spd.pages[i])
6312 break;
6313
6314 rem = tracing_fill_pipe_page(rem, iter);
6315
6316 /* Copy the data into the page, so we can start over. */
6317 ret = trace_seq_to_buffer(&iter->seq,
6318 page_address(spd.pages[i]),
6319 trace_seq_used(&iter->seq));
6320 if (ret < 0) {
6321 __free_page(spd.pages[i]);
6322 break;
6323 }
6324 spd.partial[i].offset = 0;
6325 spd.partial[i].len = trace_seq_used(&iter->seq);
6326
6327 trace_seq_init(&iter->seq);
6328 }
6329
6330 trace_access_unlock(iter->cpu_file);
6331 trace_event_read_unlock();
6332 mutex_unlock(&iter->mutex);
6333
6334 spd.nr_pages = i;
6335
6336 if (i)
6337 ret = splice_to_pipe(pipe, &spd);
6338 else
6339 ret = 0;
6340 out:
6341 splice_shrink_spd(&spd);
6342 return ret;
6343
6344 out_err:
6345 mutex_unlock(&iter->mutex);
6346 goto out;
6347 }
6348
6349 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6350 tracing_entries_read(struct file *filp, char __user *ubuf,
6351 size_t cnt, loff_t *ppos)
6352 {
6353 struct inode *inode = file_inode(filp);
6354 struct trace_array *tr = inode->i_private;
6355 int cpu = tracing_get_cpu(inode);
6356 char buf[64];
6357 int r = 0;
6358 ssize_t ret;
6359
6360 mutex_lock(&trace_types_lock);
6361
6362 if (cpu == RING_BUFFER_ALL_CPUS) {
6363 int cpu, buf_size_same;
6364 unsigned long size;
6365
6366 size = 0;
6367 buf_size_same = 1;
6368 /* check if all cpu sizes are same */
6369 for_each_tracing_cpu(cpu) {
6370 /* fill in the size from first enabled cpu */
6371 if (size == 0)
6372 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6373 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6374 buf_size_same = 0;
6375 break;
6376 }
6377 }
6378
6379 if (buf_size_same) {
6380 if (!ring_buffer_expanded)
6381 r = sprintf(buf, "%lu (expanded: %lu)\n",
6382 size >> 10,
6383 trace_buf_size >> 10);
6384 else
6385 r = sprintf(buf, "%lu\n", size >> 10);
6386 } else
6387 r = sprintf(buf, "X\n");
6388 } else
6389 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6390
6391 mutex_unlock(&trace_types_lock);
6392
6393 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6394 return ret;
6395 }
6396
6397 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6398 tracing_entries_write(struct file *filp, const char __user *ubuf,
6399 size_t cnt, loff_t *ppos)
6400 {
6401 struct inode *inode = file_inode(filp);
6402 struct trace_array *tr = inode->i_private;
6403 unsigned long val;
6404 int ret;
6405
6406 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6407 if (ret)
6408 return ret;
6409
6410 /* must have at least 1 entry */
6411 if (!val)
6412 return -EINVAL;
6413
6414 /* value is in KB */
6415 val <<= 10;
6416 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6417 if (ret < 0)
6418 return ret;
6419
6420 *ppos += cnt;
6421
6422 return cnt;
6423 }
6424
6425 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6426 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6427 size_t cnt, loff_t *ppos)
6428 {
6429 struct trace_array *tr = filp->private_data;
6430 char buf[64];
6431 int r, cpu;
6432 unsigned long size = 0, expanded_size = 0;
6433
6434 mutex_lock(&trace_types_lock);
6435 for_each_tracing_cpu(cpu) {
6436 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6437 if (!ring_buffer_expanded)
6438 expanded_size += trace_buf_size >> 10;
6439 }
6440 if (ring_buffer_expanded)
6441 r = sprintf(buf, "%lu\n", size);
6442 else
6443 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6444 mutex_unlock(&trace_types_lock);
6445
6446 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6447 }
6448
6449 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6450 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6451 size_t cnt, loff_t *ppos)
6452 {
6453 /*
6454 * There is no need to read what the user has written, this function
6455 * is just to make sure that there is no error when "echo" is used
6456 */
6457
6458 *ppos += cnt;
6459
6460 return cnt;
6461 }
6462
6463 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6464 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6465 {
6466 struct trace_array *tr = inode->i_private;
6467
6468 /* disable tracing ? */
6469 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6470 tracer_tracing_off(tr);
6471 /* resize the ring buffer to 0 */
6472 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6473
6474 trace_array_put(tr);
6475
6476 return 0;
6477 }
6478
6479 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6480 tracing_mark_write(struct file *filp, const char __user *ubuf,
6481 size_t cnt, loff_t *fpos)
6482 {
6483 struct trace_array *tr = filp->private_data;
6484 struct ring_buffer_event *event;
6485 enum event_trigger_type tt = ETT_NONE;
6486 struct ring_buffer *buffer;
6487 struct print_entry *entry;
6488 unsigned long irq_flags;
6489 ssize_t written;
6490 int size;
6491 int len;
6492
6493 /* Used in tracing_mark_raw_write() as well */
6494 #define FAULTED_STR "<faulted>"
6495 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6496
6497 if (tracing_disabled)
6498 return -EINVAL;
6499
6500 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6501 return -EINVAL;
6502
6503 if (cnt > TRACE_BUF_SIZE)
6504 cnt = TRACE_BUF_SIZE;
6505
6506 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6507
6508 local_save_flags(irq_flags);
6509 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6510
6511 /* If less than "<faulted>", then make sure we can still add that */
6512 if (cnt < FAULTED_SIZE)
6513 size += FAULTED_SIZE - cnt;
6514
6515 buffer = tr->trace_buffer.buffer;
6516 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6517 irq_flags, preempt_count());
6518 if (unlikely(!event))
6519 /* Ring buffer disabled, return as if not open for write */
6520 return -EBADF;
6521
6522 entry = ring_buffer_event_data(event);
6523 entry->ip = _THIS_IP_;
6524
6525 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6526 if (len) {
6527 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6528 cnt = FAULTED_SIZE;
6529 written = -EFAULT;
6530 } else
6531 written = cnt;
6532 len = cnt;
6533
6534 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6535 /* do not add \n before testing triggers, but add \0 */
6536 entry->buf[cnt] = '\0';
6537 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6538 }
6539
6540 if (entry->buf[cnt - 1] != '\n') {
6541 entry->buf[cnt] = '\n';
6542 entry->buf[cnt + 1] = '\0';
6543 } else
6544 entry->buf[cnt] = '\0';
6545
6546 __buffer_unlock_commit(buffer, event);
6547
6548 if (tt)
6549 event_triggers_post_call(tr->trace_marker_file, tt);
6550
6551 if (written > 0)
6552 *fpos += written;
6553
6554 return written;
6555 }
6556
6557 /* Limit it for now to 3K (including tag) */
6558 #define RAW_DATA_MAX_SIZE (1024*3)
6559
6560 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6561 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6562 size_t cnt, loff_t *fpos)
6563 {
6564 struct trace_array *tr = filp->private_data;
6565 struct ring_buffer_event *event;
6566 struct ring_buffer *buffer;
6567 struct raw_data_entry *entry;
6568 unsigned long irq_flags;
6569 ssize_t written;
6570 int size;
6571 int len;
6572
6573 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6574
6575 if (tracing_disabled)
6576 return -EINVAL;
6577
6578 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6579 return -EINVAL;
6580
6581 /* The marker must at least have a tag id */
6582 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6583 return -EINVAL;
6584
6585 if (cnt > TRACE_BUF_SIZE)
6586 cnt = TRACE_BUF_SIZE;
6587
6588 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6589
6590 local_save_flags(irq_flags);
6591 size = sizeof(*entry) + cnt;
6592 if (cnt < FAULT_SIZE_ID)
6593 size += FAULT_SIZE_ID - cnt;
6594
6595 buffer = tr->trace_buffer.buffer;
6596 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6597 irq_flags, preempt_count());
6598 if (!event)
6599 /* Ring buffer disabled, return as if not open for write */
6600 return -EBADF;
6601
6602 entry = ring_buffer_event_data(event);
6603
6604 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6605 if (len) {
6606 entry->id = -1;
6607 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6608 written = -EFAULT;
6609 } else
6610 written = cnt;
6611
6612 __buffer_unlock_commit(buffer, event);
6613
6614 if (written > 0)
6615 *fpos += written;
6616
6617 return written;
6618 }
6619
tracing_clock_show(struct seq_file * m,void * v)6620 static int tracing_clock_show(struct seq_file *m, void *v)
6621 {
6622 struct trace_array *tr = m->private;
6623 int i;
6624
6625 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6626 seq_printf(m,
6627 "%s%s%s%s", i ? " " : "",
6628 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6629 i == tr->clock_id ? "]" : "");
6630 seq_putc(m, '\n');
6631
6632 return 0;
6633 }
6634
tracing_set_clock(struct trace_array * tr,const char * clockstr)6635 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6636 {
6637 int i;
6638
6639 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6640 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6641 break;
6642 }
6643 if (i == ARRAY_SIZE(trace_clocks))
6644 return -EINVAL;
6645
6646 mutex_lock(&trace_types_lock);
6647
6648 tr->clock_id = i;
6649
6650 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6651
6652 /*
6653 * New clock may not be consistent with the previous clock.
6654 * Reset the buffer so that it doesn't have incomparable timestamps.
6655 */
6656 tracing_reset_online_cpus(&tr->trace_buffer);
6657
6658 #ifdef CONFIG_TRACER_MAX_TRACE
6659 if (tr->max_buffer.buffer)
6660 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6661 tracing_reset_online_cpus(&tr->max_buffer);
6662 #endif
6663
6664 mutex_unlock(&trace_types_lock);
6665
6666 return 0;
6667 }
6668
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6669 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6670 size_t cnt, loff_t *fpos)
6671 {
6672 struct seq_file *m = filp->private_data;
6673 struct trace_array *tr = m->private;
6674 char buf[64];
6675 const char *clockstr;
6676 int ret;
6677
6678 if (cnt >= sizeof(buf))
6679 return -EINVAL;
6680
6681 if (copy_from_user(buf, ubuf, cnt))
6682 return -EFAULT;
6683
6684 buf[cnt] = 0;
6685
6686 clockstr = strstrip(buf);
6687
6688 ret = tracing_set_clock(tr, clockstr);
6689 if (ret)
6690 return ret;
6691
6692 *fpos += cnt;
6693
6694 return cnt;
6695 }
6696
tracing_clock_open(struct inode * inode,struct file * file)6697 static int tracing_clock_open(struct inode *inode, struct file *file)
6698 {
6699 struct trace_array *tr = inode->i_private;
6700 int ret;
6701
6702 ret = tracing_check_open_get_tr(tr);
6703 if (ret)
6704 return ret;
6705
6706 ret = single_open(file, tracing_clock_show, inode->i_private);
6707 if (ret < 0)
6708 trace_array_put(tr);
6709
6710 return ret;
6711 }
6712
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6713 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6714 {
6715 struct trace_array *tr = m->private;
6716
6717 mutex_lock(&trace_types_lock);
6718
6719 if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6720 seq_puts(m, "delta [absolute]\n");
6721 else
6722 seq_puts(m, "[delta] absolute\n");
6723
6724 mutex_unlock(&trace_types_lock);
6725
6726 return 0;
6727 }
6728
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6729 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6730 {
6731 struct trace_array *tr = inode->i_private;
6732 int ret;
6733
6734 ret = tracing_check_open_get_tr(tr);
6735 if (ret)
6736 return ret;
6737
6738 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6739 if (ret < 0)
6740 trace_array_put(tr);
6741
6742 return ret;
6743 }
6744
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)6745 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6746 {
6747 int ret = 0;
6748
6749 mutex_lock(&trace_types_lock);
6750
6751 if (abs && tr->time_stamp_abs_ref++)
6752 goto out;
6753
6754 if (!abs) {
6755 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6756 ret = -EINVAL;
6757 goto out;
6758 }
6759
6760 if (--tr->time_stamp_abs_ref)
6761 goto out;
6762 }
6763
6764 ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6765
6766 #ifdef CONFIG_TRACER_MAX_TRACE
6767 if (tr->max_buffer.buffer)
6768 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6769 #endif
6770 out:
6771 mutex_unlock(&trace_types_lock);
6772
6773 return ret;
6774 }
6775
6776 struct ftrace_buffer_info {
6777 struct trace_iterator iter;
6778 void *spare;
6779 unsigned int spare_cpu;
6780 unsigned int read;
6781 };
6782
6783 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)6784 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6785 {
6786 struct trace_array *tr = inode->i_private;
6787 struct trace_iterator *iter;
6788 struct seq_file *m;
6789 int ret;
6790
6791 ret = tracing_check_open_get_tr(tr);
6792 if (ret)
6793 return ret;
6794
6795 if (file->f_mode & FMODE_READ) {
6796 iter = __tracing_open(inode, file, true);
6797 if (IS_ERR(iter))
6798 ret = PTR_ERR(iter);
6799 } else {
6800 /* Writes still need the seq_file to hold the private data */
6801 ret = -ENOMEM;
6802 m = kzalloc(sizeof(*m), GFP_KERNEL);
6803 if (!m)
6804 goto out;
6805 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6806 if (!iter) {
6807 kfree(m);
6808 goto out;
6809 }
6810 ret = 0;
6811
6812 iter->tr = tr;
6813 iter->trace_buffer = &tr->max_buffer;
6814 iter->cpu_file = tracing_get_cpu(inode);
6815 m->private = iter;
6816 file->private_data = m;
6817 }
6818 out:
6819 if (ret < 0)
6820 trace_array_put(tr);
6821
6822 return ret;
6823 }
6824
tracing_swap_cpu_buffer(void * tr)6825 static void tracing_swap_cpu_buffer(void *tr)
6826 {
6827 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
6828 }
6829
6830 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6831 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6832 loff_t *ppos)
6833 {
6834 struct seq_file *m = filp->private_data;
6835 struct trace_iterator *iter = m->private;
6836 struct trace_array *tr = iter->tr;
6837 unsigned long val;
6838 int ret;
6839
6840 ret = tracing_update_buffers();
6841 if (ret < 0)
6842 return ret;
6843
6844 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6845 if (ret)
6846 return ret;
6847
6848 mutex_lock(&trace_types_lock);
6849
6850 if (tr->current_trace->use_max_tr) {
6851 ret = -EBUSY;
6852 goto out;
6853 }
6854
6855 local_irq_disable();
6856 arch_spin_lock(&tr->max_lock);
6857 if (tr->cond_snapshot)
6858 ret = -EBUSY;
6859 arch_spin_unlock(&tr->max_lock);
6860 local_irq_enable();
6861 if (ret)
6862 goto out;
6863
6864 switch (val) {
6865 case 0:
6866 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6867 ret = -EINVAL;
6868 break;
6869 }
6870 if (tr->allocated_snapshot)
6871 free_snapshot(tr);
6872 break;
6873 case 1:
6874 /* Only allow per-cpu swap if the ring buffer supports it */
6875 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6876 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6877 ret = -EINVAL;
6878 break;
6879 }
6880 #endif
6881 if (tr->allocated_snapshot)
6882 ret = resize_buffer_duplicate_size(&tr->max_buffer,
6883 &tr->trace_buffer, iter->cpu_file);
6884 else
6885 ret = tracing_alloc_snapshot_instance(tr);
6886 if (ret < 0)
6887 break;
6888 /* Now, we're going to swap */
6889 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
6890 local_irq_disable();
6891 update_max_tr(tr, current, smp_processor_id(), NULL);
6892 local_irq_enable();
6893 } else {
6894 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
6895 (void *)tr, 1);
6896 }
6897 break;
6898 default:
6899 if (tr->allocated_snapshot) {
6900 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6901 tracing_reset_online_cpus(&tr->max_buffer);
6902 else
6903 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6904 }
6905 break;
6906 }
6907
6908 if (ret >= 0) {
6909 *ppos += cnt;
6910 ret = cnt;
6911 }
6912 out:
6913 mutex_unlock(&trace_types_lock);
6914 return ret;
6915 }
6916
tracing_snapshot_release(struct inode * inode,struct file * file)6917 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6918 {
6919 struct seq_file *m = file->private_data;
6920 int ret;
6921
6922 ret = tracing_release(inode, file);
6923
6924 if (file->f_mode & FMODE_READ)
6925 return ret;
6926
6927 /* If write only, the seq_file is just a stub */
6928 if (m)
6929 kfree(m->private);
6930 kfree(m);
6931
6932 return 0;
6933 }
6934
6935 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6936 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6937 size_t count, loff_t *ppos);
6938 static int tracing_buffers_release(struct inode *inode, struct file *file);
6939 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6940 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6941
snapshot_raw_open(struct inode * inode,struct file * filp)6942 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6943 {
6944 struct ftrace_buffer_info *info;
6945 int ret;
6946
6947 /* The following checks for tracefs lockdown */
6948 ret = tracing_buffers_open(inode, filp);
6949 if (ret < 0)
6950 return ret;
6951
6952 info = filp->private_data;
6953
6954 if (info->iter.trace->use_max_tr) {
6955 tracing_buffers_release(inode, filp);
6956 return -EBUSY;
6957 }
6958
6959 info->iter.snapshot = true;
6960 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6961
6962 return ret;
6963 }
6964
6965 #endif /* CONFIG_TRACER_SNAPSHOT */
6966
6967
6968 static const struct file_operations tracing_thresh_fops = {
6969 .open = tracing_open_generic,
6970 .read = tracing_thresh_read,
6971 .write = tracing_thresh_write,
6972 .llseek = generic_file_llseek,
6973 };
6974
6975 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6976 static const struct file_operations tracing_max_lat_fops = {
6977 .open = tracing_open_generic,
6978 .read = tracing_max_lat_read,
6979 .write = tracing_max_lat_write,
6980 .llseek = generic_file_llseek,
6981 };
6982 #endif
6983
6984 static const struct file_operations set_tracer_fops = {
6985 .open = tracing_open_generic_tr,
6986 .read = tracing_set_trace_read,
6987 .write = tracing_set_trace_write,
6988 .llseek = generic_file_llseek,
6989 .release = tracing_release_generic_tr,
6990 };
6991
6992 static const struct file_operations tracing_pipe_fops = {
6993 .open = tracing_open_pipe,
6994 .poll = tracing_poll_pipe,
6995 .read = tracing_read_pipe,
6996 .splice_read = tracing_splice_read_pipe,
6997 .release = tracing_release_pipe,
6998 .llseek = no_llseek,
6999 };
7000
7001 static const struct file_operations tracing_entries_fops = {
7002 .open = tracing_open_generic_tr,
7003 .read = tracing_entries_read,
7004 .write = tracing_entries_write,
7005 .llseek = generic_file_llseek,
7006 .release = tracing_release_generic_tr,
7007 };
7008
7009 static const struct file_operations tracing_total_entries_fops = {
7010 .open = tracing_open_generic_tr,
7011 .read = tracing_total_entries_read,
7012 .llseek = generic_file_llseek,
7013 .release = tracing_release_generic_tr,
7014 };
7015
7016 static const struct file_operations tracing_free_buffer_fops = {
7017 .open = tracing_open_generic_tr,
7018 .write = tracing_free_buffer_write,
7019 .release = tracing_free_buffer_release,
7020 };
7021
7022 static const struct file_operations tracing_mark_fops = {
7023 .open = tracing_open_generic_tr,
7024 .write = tracing_mark_write,
7025 .llseek = generic_file_llseek,
7026 .release = tracing_release_generic_tr,
7027 };
7028
7029 static const struct file_operations tracing_mark_raw_fops = {
7030 .open = tracing_open_generic_tr,
7031 .write = tracing_mark_raw_write,
7032 .llseek = generic_file_llseek,
7033 .release = tracing_release_generic_tr,
7034 };
7035
7036 static const struct file_operations trace_clock_fops = {
7037 .open = tracing_clock_open,
7038 .read = seq_read,
7039 .llseek = seq_lseek,
7040 .release = tracing_single_release_tr,
7041 .write = tracing_clock_write,
7042 };
7043
7044 static const struct file_operations trace_time_stamp_mode_fops = {
7045 .open = tracing_time_stamp_mode_open,
7046 .read = seq_read,
7047 .llseek = seq_lseek,
7048 .release = tracing_single_release_tr,
7049 };
7050
7051 #ifdef CONFIG_TRACER_SNAPSHOT
7052 static const struct file_operations snapshot_fops = {
7053 .open = tracing_snapshot_open,
7054 .read = seq_read,
7055 .write = tracing_snapshot_write,
7056 .llseek = tracing_lseek,
7057 .release = tracing_snapshot_release,
7058 };
7059
7060 static const struct file_operations snapshot_raw_fops = {
7061 .open = snapshot_raw_open,
7062 .read = tracing_buffers_read,
7063 .release = tracing_buffers_release,
7064 .splice_read = tracing_buffers_splice_read,
7065 .llseek = no_llseek,
7066 };
7067
7068 #endif /* CONFIG_TRACER_SNAPSHOT */
7069
7070 #define TRACING_LOG_ERRS_MAX 8
7071 #define TRACING_LOG_LOC_MAX 128
7072
7073 #define CMD_PREFIX " Command: "
7074
7075 struct err_info {
7076 const char **errs; /* ptr to loc-specific array of err strings */
7077 u8 type; /* index into errs -> specific err string */
7078 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7079 u64 ts;
7080 };
7081
7082 struct tracing_log_err {
7083 struct list_head list;
7084 struct err_info info;
7085 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7086 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7087 };
7088
7089 static DEFINE_MUTEX(tracing_err_log_lock);
7090
get_tracing_log_err(struct trace_array * tr)7091 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7092 {
7093 struct tracing_log_err *err;
7094
7095 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7096 err = kzalloc(sizeof(*err), GFP_KERNEL);
7097 if (!err)
7098 err = ERR_PTR(-ENOMEM);
7099 else
7100 tr->n_err_log_entries++;
7101
7102 return err;
7103 }
7104
7105 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7106 list_del(&err->list);
7107
7108 return err;
7109 }
7110
7111 /**
7112 * err_pos - find the position of a string within a command for error careting
7113 * @cmd: The tracing command that caused the error
7114 * @str: The string to position the caret at within @cmd
7115 *
7116 * Finds the position of the first occurence of @str within @cmd. The
7117 * return value can be passed to tracing_log_err() for caret placement
7118 * within @cmd.
7119 *
7120 * Returns the index within @cmd of the first occurence of @str or 0
7121 * if @str was not found.
7122 */
err_pos(char * cmd,const char * str)7123 unsigned int err_pos(char *cmd, const char *str)
7124 {
7125 char *found;
7126
7127 if (WARN_ON(!strlen(cmd)))
7128 return 0;
7129
7130 found = strstr(cmd, str);
7131 if (found)
7132 return found - cmd;
7133
7134 return 0;
7135 }
7136
7137 /**
7138 * tracing_log_err - write an error to the tracing error log
7139 * @tr: The associated trace array for the error (NULL for top level array)
7140 * @loc: A string describing where the error occurred
7141 * @cmd: The tracing command that caused the error
7142 * @errs: The array of loc-specific static error strings
7143 * @type: The index into errs[], which produces the specific static err string
7144 * @pos: The position the caret should be placed in the cmd
7145 *
7146 * Writes an error into tracing/error_log of the form:
7147 *
7148 * <loc>: error: <text>
7149 * Command: <cmd>
7150 * ^
7151 *
7152 * tracing/error_log is a small log file containing the last
7153 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7154 * unless there has been a tracing error, and the error log can be
7155 * cleared and have its memory freed by writing the empty string in
7156 * truncation mode to it i.e. echo > tracing/error_log.
7157 *
7158 * NOTE: the @errs array along with the @type param are used to
7159 * produce a static error string - this string is not copied and saved
7160 * when the error is logged - only a pointer to it is saved. See
7161 * existing callers for examples of how static strings are typically
7162 * defined for use with tracing_log_err().
7163 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7164 void tracing_log_err(struct trace_array *tr,
7165 const char *loc, const char *cmd,
7166 const char **errs, u8 type, u8 pos)
7167 {
7168 struct tracing_log_err *err;
7169
7170 if (!tr)
7171 tr = &global_trace;
7172
7173 mutex_lock(&tracing_err_log_lock);
7174 err = get_tracing_log_err(tr);
7175 if (PTR_ERR(err) == -ENOMEM) {
7176 mutex_unlock(&tracing_err_log_lock);
7177 return;
7178 }
7179
7180 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7181 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7182
7183 err->info.errs = errs;
7184 err->info.type = type;
7185 err->info.pos = pos;
7186 err->info.ts = local_clock();
7187
7188 list_add_tail(&err->list, &tr->err_log);
7189 mutex_unlock(&tracing_err_log_lock);
7190 }
7191
clear_tracing_err_log(struct trace_array * tr)7192 static void clear_tracing_err_log(struct trace_array *tr)
7193 {
7194 struct tracing_log_err *err, *next;
7195
7196 mutex_lock(&tracing_err_log_lock);
7197 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7198 list_del(&err->list);
7199 kfree(err);
7200 }
7201
7202 tr->n_err_log_entries = 0;
7203 mutex_unlock(&tracing_err_log_lock);
7204 }
7205
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7206 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7207 {
7208 struct trace_array *tr = m->private;
7209
7210 mutex_lock(&tracing_err_log_lock);
7211
7212 return seq_list_start(&tr->err_log, *pos);
7213 }
7214
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7215 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7216 {
7217 struct trace_array *tr = m->private;
7218
7219 return seq_list_next(v, &tr->err_log, pos);
7220 }
7221
tracing_err_log_seq_stop(struct seq_file * m,void * v)7222 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7223 {
7224 mutex_unlock(&tracing_err_log_lock);
7225 }
7226
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7227 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7228 {
7229 u8 i;
7230
7231 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7232 seq_putc(m, ' ');
7233 for (i = 0; i < pos; i++)
7234 seq_putc(m, ' ');
7235 seq_puts(m, "^\n");
7236 }
7237
tracing_err_log_seq_show(struct seq_file * m,void * v)7238 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7239 {
7240 struct tracing_log_err *err = v;
7241
7242 if (err) {
7243 const char *err_text = err->info.errs[err->info.type];
7244 u64 sec = err->info.ts;
7245 u32 nsec;
7246
7247 nsec = do_div(sec, NSEC_PER_SEC);
7248 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7249 err->loc, err_text);
7250 seq_printf(m, "%s", err->cmd);
7251 tracing_err_log_show_pos(m, err->info.pos);
7252 }
7253
7254 return 0;
7255 }
7256
7257 static const struct seq_operations tracing_err_log_seq_ops = {
7258 .start = tracing_err_log_seq_start,
7259 .next = tracing_err_log_seq_next,
7260 .stop = tracing_err_log_seq_stop,
7261 .show = tracing_err_log_seq_show
7262 };
7263
tracing_err_log_open(struct inode * inode,struct file * file)7264 static int tracing_err_log_open(struct inode *inode, struct file *file)
7265 {
7266 struct trace_array *tr = inode->i_private;
7267 int ret = 0;
7268
7269 ret = tracing_check_open_get_tr(tr);
7270 if (ret)
7271 return ret;
7272
7273 /* If this file was opened for write, then erase contents */
7274 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7275 clear_tracing_err_log(tr);
7276
7277 if (file->f_mode & FMODE_READ) {
7278 ret = seq_open(file, &tracing_err_log_seq_ops);
7279 if (!ret) {
7280 struct seq_file *m = file->private_data;
7281 m->private = tr;
7282 } else {
7283 trace_array_put(tr);
7284 }
7285 }
7286 return ret;
7287 }
7288
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7289 static ssize_t tracing_err_log_write(struct file *file,
7290 const char __user *buffer,
7291 size_t count, loff_t *ppos)
7292 {
7293 return count;
7294 }
7295
tracing_err_log_release(struct inode * inode,struct file * file)7296 static int tracing_err_log_release(struct inode *inode, struct file *file)
7297 {
7298 struct trace_array *tr = inode->i_private;
7299
7300 trace_array_put(tr);
7301
7302 if (file->f_mode & FMODE_READ)
7303 seq_release(inode, file);
7304
7305 return 0;
7306 }
7307
7308 static const struct file_operations tracing_err_log_fops = {
7309 .open = tracing_err_log_open,
7310 .write = tracing_err_log_write,
7311 .read = seq_read,
7312 .llseek = tracing_lseek,
7313 .release = tracing_err_log_release,
7314 };
7315
tracing_buffers_open(struct inode * inode,struct file * filp)7316 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7317 {
7318 struct trace_array *tr = inode->i_private;
7319 struct ftrace_buffer_info *info;
7320 int ret;
7321
7322 ret = tracing_check_open_get_tr(tr);
7323 if (ret)
7324 return ret;
7325
7326 info = kzalloc(sizeof(*info), GFP_KERNEL);
7327 if (!info) {
7328 trace_array_put(tr);
7329 return -ENOMEM;
7330 }
7331
7332 mutex_lock(&trace_types_lock);
7333
7334 info->iter.tr = tr;
7335 info->iter.cpu_file = tracing_get_cpu(inode);
7336 info->iter.trace = tr->current_trace;
7337 info->iter.trace_buffer = &tr->trace_buffer;
7338 info->spare = NULL;
7339 /* Force reading ring buffer for first read */
7340 info->read = (unsigned int)-1;
7341
7342 filp->private_data = info;
7343
7344 tr->current_trace->ref++;
7345
7346 mutex_unlock(&trace_types_lock);
7347
7348 ret = nonseekable_open(inode, filp);
7349 if (ret < 0)
7350 trace_array_put(tr);
7351
7352 return ret;
7353 }
7354
7355 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7356 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7357 {
7358 struct ftrace_buffer_info *info = filp->private_data;
7359 struct trace_iterator *iter = &info->iter;
7360
7361 return trace_poll(iter, filp, poll_table);
7362 }
7363
7364 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7365 tracing_buffers_read(struct file *filp, char __user *ubuf,
7366 size_t count, loff_t *ppos)
7367 {
7368 struct ftrace_buffer_info *info = filp->private_data;
7369 struct trace_iterator *iter = &info->iter;
7370 ssize_t ret = 0;
7371 ssize_t size;
7372
7373 if (!count)
7374 return 0;
7375
7376 #ifdef CONFIG_TRACER_MAX_TRACE
7377 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7378 return -EBUSY;
7379 #endif
7380
7381 if (!info->spare) {
7382 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7383 iter->cpu_file);
7384 if (IS_ERR(info->spare)) {
7385 ret = PTR_ERR(info->spare);
7386 info->spare = NULL;
7387 } else {
7388 info->spare_cpu = iter->cpu_file;
7389 }
7390 }
7391 if (!info->spare)
7392 return ret;
7393
7394 /* Do we have previous read data to read? */
7395 if (info->read < PAGE_SIZE)
7396 goto read;
7397
7398 again:
7399 trace_access_lock(iter->cpu_file);
7400 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7401 &info->spare,
7402 count,
7403 iter->cpu_file, 0);
7404 trace_access_unlock(iter->cpu_file);
7405
7406 if (ret < 0) {
7407 if (trace_empty(iter)) {
7408 if ((filp->f_flags & O_NONBLOCK))
7409 return -EAGAIN;
7410
7411 ret = wait_on_pipe(iter, 0);
7412 if (ret)
7413 return ret;
7414
7415 goto again;
7416 }
7417 return 0;
7418 }
7419
7420 info->read = 0;
7421 read:
7422 size = PAGE_SIZE - info->read;
7423 if (size > count)
7424 size = count;
7425
7426 ret = copy_to_user(ubuf, info->spare + info->read, size);
7427 if (ret == size)
7428 return -EFAULT;
7429
7430 size -= ret;
7431
7432 *ppos += size;
7433 info->read += size;
7434
7435 return size;
7436 }
7437
tracing_buffers_release(struct inode * inode,struct file * file)7438 static int tracing_buffers_release(struct inode *inode, struct file *file)
7439 {
7440 struct ftrace_buffer_info *info = file->private_data;
7441 struct trace_iterator *iter = &info->iter;
7442
7443 mutex_lock(&trace_types_lock);
7444
7445 iter->tr->current_trace->ref--;
7446
7447 __trace_array_put(iter->tr);
7448
7449 if (info->spare)
7450 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7451 info->spare_cpu, info->spare);
7452 kfree(info);
7453
7454 mutex_unlock(&trace_types_lock);
7455
7456 return 0;
7457 }
7458
7459 struct buffer_ref {
7460 struct ring_buffer *buffer;
7461 void *page;
7462 int cpu;
7463 refcount_t refcount;
7464 };
7465
buffer_ref_release(struct buffer_ref * ref)7466 static void buffer_ref_release(struct buffer_ref *ref)
7467 {
7468 if (!refcount_dec_and_test(&ref->refcount))
7469 return;
7470 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7471 kfree(ref);
7472 }
7473
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7474 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7475 struct pipe_buffer *buf)
7476 {
7477 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7478
7479 buffer_ref_release(ref);
7480 buf->private = 0;
7481 }
7482
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7483 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7484 struct pipe_buffer *buf)
7485 {
7486 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7487
7488 if (refcount_read(&ref->refcount) > INT_MAX/2)
7489 return false;
7490
7491 refcount_inc(&ref->refcount);
7492 return true;
7493 }
7494
7495 /* Pipe buffer operations for a buffer. */
7496 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7497 .confirm = generic_pipe_buf_confirm,
7498 .release = buffer_pipe_buf_release,
7499 .steal = generic_pipe_buf_nosteal,
7500 .get = buffer_pipe_buf_get,
7501 };
7502
7503 /*
7504 * Callback from splice_to_pipe(), if we need to release some pages
7505 * at the end of the spd in case we error'ed out in filling the pipe.
7506 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7507 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7508 {
7509 struct buffer_ref *ref =
7510 (struct buffer_ref *)spd->partial[i].private;
7511
7512 buffer_ref_release(ref);
7513 spd->partial[i].private = 0;
7514 }
7515
7516 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7517 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7518 struct pipe_inode_info *pipe, size_t len,
7519 unsigned int flags)
7520 {
7521 struct ftrace_buffer_info *info = file->private_data;
7522 struct trace_iterator *iter = &info->iter;
7523 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7524 struct page *pages_def[PIPE_DEF_BUFFERS];
7525 struct splice_pipe_desc spd = {
7526 .pages = pages_def,
7527 .partial = partial_def,
7528 .nr_pages_max = PIPE_DEF_BUFFERS,
7529 .ops = &buffer_pipe_buf_ops,
7530 .spd_release = buffer_spd_release,
7531 };
7532 struct buffer_ref *ref;
7533 int entries, i;
7534 ssize_t ret = 0;
7535
7536 #ifdef CONFIG_TRACER_MAX_TRACE
7537 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7538 return -EBUSY;
7539 #endif
7540
7541 if (*ppos & (PAGE_SIZE - 1))
7542 return -EINVAL;
7543
7544 if (len & (PAGE_SIZE - 1)) {
7545 if (len < PAGE_SIZE)
7546 return -EINVAL;
7547 len &= PAGE_MASK;
7548 }
7549
7550 if (splice_grow_spd(pipe, &spd))
7551 return -ENOMEM;
7552
7553 again:
7554 trace_access_lock(iter->cpu_file);
7555 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7556
7557 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7558 struct page *page;
7559 int r;
7560
7561 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7562 if (!ref) {
7563 ret = -ENOMEM;
7564 break;
7565 }
7566
7567 refcount_set(&ref->refcount, 1);
7568 ref->buffer = iter->trace_buffer->buffer;
7569 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7570 if (IS_ERR(ref->page)) {
7571 ret = PTR_ERR(ref->page);
7572 ref->page = NULL;
7573 kfree(ref);
7574 break;
7575 }
7576 ref->cpu = iter->cpu_file;
7577
7578 r = ring_buffer_read_page(ref->buffer, &ref->page,
7579 len, iter->cpu_file, 1);
7580 if (r < 0) {
7581 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7582 ref->page);
7583 kfree(ref);
7584 break;
7585 }
7586
7587 page = virt_to_page(ref->page);
7588
7589 spd.pages[i] = page;
7590 spd.partial[i].len = PAGE_SIZE;
7591 spd.partial[i].offset = 0;
7592 spd.partial[i].private = (unsigned long)ref;
7593 spd.nr_pages++;
7594 *ppos += PAGE_SIZE;
7595
7596 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7597 }
7598
7599 trace_access_unlock(iter->cpu_file);
7600 spd.nr_pages = i;
7601
7602 /* did we read anything? */
7603 if (!spd.nr_pages) {
7604 if (ret)
7605 goto out;
7606
7607 ret = -EAGAIN;
7608 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7609 goto out;
7610
7611 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7612 if (ret)
7613 goto out;
7614
7615 goto again;
7616 }
7617
7618 ret = splice_to_pipe(pipe, &spd);
7619 out:
7620 splice_shrink_spd(&spd);
7621
7622 return ret;
7623 }
7624
7625 static const struct file_operations tracing_buffers_fops = {
7626 .open = tracing_buffers_open,
7627 .read = tracing_buffers_read,
7628 .poll = tracing_buffers_poll,
7629 .release = tracing_buffers_release,
7630 .splice_read = tracing_buffers_splice_read,
7631 .llseek = no_llseek,
7632 };
7633
7634 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7635 tracing_stats_read(struct file *filp, char __user *ubuf,
7636 size_t count, loff_t *ppos)
7637 {
7638 struct inode *inode = file_inode(filp);
7639 struct trace_array *tr = inode->i_private;
7640 struct trace_buffer *trace_buf = &tr->trace_buffer;
7641 int cpu = tracing_get_cpu(inode);
7642 struct trace_seq *s;
7643 unsigned long cnt;
7644 unsigned long long t;
7645 unsigned long usec_rem;
7646
7647 s = kmalloc(sizeof(*s), GFP_KERNEL);
7648 if (!s)
7649 return -ENOMEM;
7650
7651 trace_seq_init(s);
7652
7653 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7654 trace_seq_printf(s, "entries: %ld\n", cnt);
7655
7656 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7657 trace_seq_printf(s, "overrun: %ld\n", cnt);
7658
7659 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7660 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7661
7662 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7663 trace_seq_printf(s, "bytes: %ld\n", cnt);
7664
7665 if (trace_clocks[tr->clock_id].in_ns) {
7666 /* local or global for trace_clock */
7667 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7668 usec_rem = do_div(t, USEC_PER_SEC);
7669 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7670 t, usec_rem);
7671
7672 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7673 usec_rem = do_div(t, USEC_PER_SEC);
7674 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7675 } else {
7676 /* counter or tsc mode for trace_clock */
7677 trace_seq_printf(s, "oldest event ts: %llu\n",
7678 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7679
7680 trace_seq_printf(s, "now ts: %llu\n",
7681 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7682 }
7683
7684 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7685 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7686
7687 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7688 trace_seq_printf(s, "read events: %ld\n", cnt);
7689
7690 count = simple_read_from_buffer(ubuf, count, ppos,
7691 s->buffer, trace_seq_used(s));
7692
7693 kfree(s);
7694
7695 return count;
7696 }
7697
7698 static const struct file_operations tracing_stats_fops = {
7699 .open = tracing_open_generic_tr,
7700 .read = tracing_stats_read,
7701 .llseek = generic_file_llseek,
7702 .release = tracing_release_generic_tr,
7703 };
7704
7705 #ifdef CONFIG_DYNAMIC_FTRACE
7706
7707 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7708 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7709 size_t cnt, loff_t *ppos)
7710 {
7711 ssize_t ret;
7712 char *buf;
7713 int r;
7714
7715 /* 256 should be plenty to hold the amount needed */
7716 buf = kmalloc(256, GFP_KERNEL);
7717 if (!buf)
7718 return -ENOMEM;
7719
7720 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7721 ftrace_update_tot_cnt,
7722 ftrace_number_of_pages,
7723 ftrace_number_of_groups);
7724
7725 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7726 kfree(buf);
7727 return ret;
7728 }
7729
7730 static const struct file_operations tracing_dyn_info_fops = {
7731 .open = tracing_open_generic,
7732 .read = tracing_read_dyn_info,
7733 .llseek = generic_file_llseek,
7734 };
7735 #endif /* CONFIG_DYNAMIC_FTRACE */
7736
7737 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7738 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7739 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7740 struct trace_array *tr, struct ftrace_probe_ops *ops,
7741 void *data)
7742 {
7743 tracing_snapshot_instance(tr);
7744 }
7745
7746 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7747 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7748 struct trace_array *tr, struct ftrace_probe_ops *ops,
7749 void *data)
7750 {
7751 struct ftrace_func_mapper *mapper = data;
7752 long *count = NULL;
7753
7754 if (mapper)
7755 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7756
7757 if (count) {
7758
7759 if (*count <= 0)
7760 return;
7761
7762 (*count)--;
7763 }
7764
7765 tracing_snapshot_instance(tr);
7766 }
7767
7768 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7769 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7770 struct ftrace_probe_ops *ops, void *data)
7771 {
7772 struct ftrace_func_mapper *mapper = data;
7773 long *count = NULL;
7774
7775 seq_printf(m, "%ps:", (void *)ip);
7776
7777 seq_puts(m, "snapshot");
7778
7779 if (mapper)
7780 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7781
7782 if (count)
7783 seq_printf(m, ":count=%ld\n", *count);
7784 else
7785 seq_puts(m, ":unlimited\n");
7786
7787 return 0;
7788 }
7789
7790 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)7791 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7792 unsigned long ip, void *init_data, void **data)
7793 {
7794 struct ftrace_func_mapper *mapper = *data;
7795
7796 if (!mapper) {
7797 mapper = allocate_ftrace_func_mapper();
7798 if (!mapper)
7799 return -ENOMEM;
7800 *data = mapper;
7801 }
7802
7803 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7804 }
7805
7806 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)7807 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7808 unsigned long ip, void *data)
7809 {
7810 struct ftrace_func_mapper *mapper = data;
7811
7812 if (!ip) {
7813 if (!mapper)
7814 return;
7815 free_ftrace_func_mapper(mapper, NULL);
7816 return;
7817 }
7818
7819 ftrace_func_mapper_remove_ip(mapper, ip);
7820 }
7821
7822 static struct ftrace_probe_ops snapshot_probe_ops = {
7823 .func = ftrace_snapshot,
7824 .print = ftrace_snapshot_print,
7825 };
7826
7827 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7828 .func = ftrace_count_snapshot,
7829 .print = ftrace_snapshot_print,
7830 .init = ftrace_snapshot_init,
7831 .free = ftrace_snapshot_free,
7832 };
7833
7834 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)7835 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7836 char *glob, char *cmd, char *param, int enable)
7837 {
7838 struct ftrace_probe_ops *ops;
7839 void *count = (void *)-1;
7840 char *number;
7841 int ret;
7842
7843 if (!tr)
7844 return -ENODEV;
7845
7846 /* hash funcs only work with set_ftrace_filter */
7847 if (!enable)
7848 return -EINVAL;
7849
7850 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7851
7852 if (glob[0] == '!')
7853 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7854
7855 if (!param)
7856 goto out_reg;
7857
7858 number = strsep(¶m, ":");
7859
7860 if (!strlen(number))
7861 goto out_reg;
7862
7863 /*
7864 * We use the callback data field (which is a pointer)
7865 * as our counter.
7866 */
7867 ret = kstrtoul(number, 0, (unsigned long *)&count);
7868 if (ret)
7869 return ret;
7870
7871 out_reg:
7872 ret = tracing_alloc_snapshot_instance(tr);
7873 if (ret < 0)
7874 goto out;
7875
7876 ret = register_ftrace_function_probe(glob, tr, ops, count);
7877
7878 out:
7879 return ret < 0 ? ret : 0;
7880 }
7881
7882 static struct ftrace_func_command ftrace_snapshot_cmd = {
7883 .name = "snapshot",
7884 .func = ftrace_trace_snapshot_callback,
7885 };
7886
register_snapshot_cmd(void)7887 static __init int register_snapshot_cmd(void)
7888 {
7889 return register_ftrace_command(&ftrace_snapshot_cmd);
7890 }
7891 #else
register_snapshot_cmd(void)7892 static inline __init int register_snapshot_cmd(void) { return 0; }
7893 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7894
tracing_get_dentry(struct trace_array * tr)7895 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7896 {
7897 if (WARN_ON(!tr->dir))
7898 return ERR_PTR(-ENODEV);
7899
7900 /* Top directory uses NULL as the parent */
7901 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7902 return NULL;
7903
7904 /* All sub buffers have a descriptor */
7905 return tr->dir;
7906 }
7907
tracing_dentry_percpu(struct trace_array * tr,int cpu)7908 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7909 {
7910 struct dentry *d_tracer;
7911
7912 if (tr->percpu_dir)
7913 return tr->percpu_dir;
7914
7915 d_tracer = tracing_get_dentry(tr);
7916 if (IS_ERR(d_tracer))
7917 return NULL;
7918
7919 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7920
7921 WARN_ONCE(!tr->percpu_dir,
7922 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7923
7924 return tr->percpu_dir;
7925 }
7926
7927 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)7928 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7929 void *data, long cpu, const struct file_operations *fops)
7930 {
7931 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7932
7933 if (ret) /* See tracing_get_cpu() */
7934 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7935 return ret;
7936 }
7937
7938 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)7939 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7940 {
7941 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7942 struct dentry *d_cpu;
7943 char cpu_dir[30]; /* 30 characters should be more than enough */
7944
7945 if (!d_percpu)
7946 return;
7947
7948 snprintf(cpu_dir, 30, "cpu%ld", cpu);
7949 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7950 if (!d_cpu) {
7951 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7952 return;
7953 }
7954
7955 /* per cpu trace_pipe */
7956 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7957 tr, cpu, &tracing_pipe_fops);
7958
7959 /* per cpu trace */
7960 trace_create_cpu_file("trace", 0644, d_cpu,
7961 tr, cpu, &tracing_fops);
7962
7963 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7964 tr, cpu, &tracing_buffers_fops);
7965
7966 trace_create_cpu_file("stats", 0444, d_cpu,
7967 tr, cpu, &tracing_stats_fops);
7968
7969 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7970 tr, cpu, &tracing_entries_fops);
7971
7972 #ifdef CONFIG_TRACER_SNAPSHOT
7973 trace_create_cpu_file("snapshot", 0644, d_cpu,
7974 tr, cpu, &snapshot_fops);
7975
7976 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7977 tr, cpu, &snapshot_raw_fops);
7978 #endif
7979 }
7980
7981 #ifdef CONFIG_FTRACE_SELFTEST
7982 /* Let selftest have access to static functions in this file */
7983 #include "trace_selftest.c"
7984 #endif
7985
7986 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7987 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7988 loff_t *ppos)
7989 {
7990 struct trace_option_dentry *topt = filp->private_data;
7991 char *buf;
7992
7993 if (topt->flags->val & topt->opt->bit)
7994 buf = "1\n";
7995 else
7996 buf = "0\n";
7997
7998 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7999 }
8000
8001 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8002 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8003 loff_t *ppos)
8004 {
8005 struct trace_option_dentry *topt = filp->private_data;
8006 unsigned long val;
8007 int ret;
8008
8009 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8010 if (ret)
8011 return ret;
8012
8013 if (val != 0 && val != 1)
8014 return -EINVAL;
8015
8016 if (!!(topt->flags->val & topt->opt->bit) != val) {
8017 mutex_lock(&trace_types_lock);
8018 ret = __set_tracer_option(topt->tr, topt->flags,
8019 topt->opt, !val);
8020 mutex_unlock(&trace_types_lock);
8021 if (ret)
8022 return ret;
8023 }
8024
8025 *ppos += cnt;
8026
8027 return cnt;
8028 }
8029
tracing_open_options(struct inode * inode,struct file * filp)8030 static int tracing_open_options(struct inode *inode, struct file *filp)
8031 {
8032 struct trace_option_dentry *topt = inode->i_private;
8033 int ret;
8034
8035 ret = tracing_check_open_get_tr(topt->tr);
8036 if (ret)
8037 return ret;
8038
8039 filp->private_data = inode->i_private;
8040 return 0;
8041 }
8042
tracing_release_options(struct inode * inode,struct file * file)8043 static int tracing_release_options(struct inode *inode, struct file *file)
8044 {
8045 struct trace_option_dentry *topt = file->private_data;
8046
8047 trace_array_put(topt->tr);
8048 return 0;
8049 }
8050
8051 static const struct file_operations trace_options_fops = {
8052 .open = tracing_open_options,
8053 .read = trace_options_read,
8054 .write = trace_options_write,
8055 .llseek = generic_file_llseek,
8056 .release = tracing_release_options,
8057 };
8058
8059 /*
8060 * In order to pass in both the trace_array descriptor as well as the index
8061 * to the flag that the trace option file represents, the trace_array
8062 * has a character array of trace_flags_index[], which holds the index
8063 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8064 * The address of this character array is passed to the flag option file
8065 * read/write callbacks.
8066 *
8067 * In order to extract both the index and the trace_array descriptor,
8068 * get_tr_index() uses the following algorithm.
8069 *
8070 * idx = *ptr;
8071 *
8072 * As the pointer itself contains the address of the index (remember
8073 * index[1] == 1).
8074 *
8075 * Then to get the trace_array descriptor, by subtracting that index
8076 * from the ptr, we get to the start of the index itself.
8077 *
8078 * ptr - idx == &index[0]
8079 *
8080 * Then a simple container_of() from that pointer gets us to the
8081 * trace_array descriptor.
8082 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8083 static void get_tr_index(void *data, struct trace_array **ptr,
8084 unsigned int *pindex)
8085 {
8086 *pindex = *(unsigned char *)data;
8087
8088 *ptr = container_of(data - *pindex, struct trace_array,
8089 trace_flags_index);
8090 }
8091
8092 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8093 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8094 loff_t *ppos)
8095 {
8096 void *tr_index = filp->private_data;
8097 struct trace_array *tr;
8098 unsigned int index;
8099 char *buf;
8100
8101 get_tr_index(tr_index, &tr, &index);
8102
8103 if (tr->trace_flags & (1 << index))
8104 buf = "1\n";
8105 else
8106 buf = "0\n";
8107
8108 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8109 }
8110
8111 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8112 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8113 loff_t *ppos)
8114 {
8115 void *tr_index = filp->private_data;
8116 struct trace_array *tr;
8117 unsigned int index;
8118 unsigned long val;
8119 int ret;
8120
8121 get_tr_index(tr_index, &tr, &index);
8122
8123 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8124 if (ret)
8125 return ret;
8126
8127 if (val != 0 && val != 1)
8128 return -EINVAL;
8129
8130 mutex_lock(&event_mutex);
8131 mutex_lock(&trace_types_lock);
8132 ret = set_tracer_flag(tr, 1 << index, val);
8133 mutex_unlock(&trace_types_lock);
8134 mutex_unlock(&event_mutex);
8135
8136 if (ret < 0)
8137 return ret;
8138
8139 *ppos += cnt;
8140
8141 return cnt;
8142 }
8143
8144 static const struct file_operations trace_options_core_fops = {
8145 .open = tracing_open_generic,
8146 .read = trace_options_core_read,
8147 .write = trace_options_core_write,
8148 .llseek = generic_file_llseek,
8149 };
8150
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8151 struct dentry *trace_create_file(const char *name,
8152 umode_t mode,
8153 struct dentry *parent,
8154 void *data,
8155 const struct file_operations *fops)
8156 {
8157 struct dentry *ret;
8158
8159 ret = tracefs_create_file(name, mode, parent, data, fops);
8160 if (!ret)
8161 pr_warn("Could not create tracefs '%s' entry\n", name);
8162
8163 return ret;
8164 }
8165
8166
trace_options_init_dentry(struct trace_array * tr)8167 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8168 {
8169 struct dentry *d_tracer;
8170
8171 if (tr->options)
8172 return tr->options;
8173
8174 d_tracer = tracing_get_dentry(tr);
8175 if (IS_ERR(d_tracer))
8176 return NULL;
8177
8178 tr->options = tracefs_create_dir("options", d_tracer);
8179 if (!tr->options) {
8180 pr_warn("Could not create tracefs directory 'options'\n");
8181 return NULL;
8182 }
8183
8184 return tr->options;
8185 }
8186
8187 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8188 create_trace_option_file(struct trace_array *tr,
8189 struct trace_option_dentry *topt,
8190 struct tracer_flags *flags,
8191 struct tracer_opt *opt)
8192 {
8193 struct dentry *t_options;
8194
8195 t_options = trace_options_init_dentry(tr);
8196 if (!t_options)
8197 return;
8198
8199 topt->flags = flags;
8200 topt->opt = opt;
8201 topt->tr = tr;
8202
8203 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8204 &trace_options_fops);
8205
8206 }
8207
8208 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8209 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8210 {
8211 struct trace_option_dentry *topts;
8212 struct trace_options *tr_topts;
8213 struct tracer_flags *flags;
8214 struct tracer_opt *opts;
8215 int cnt;
8216 int i;
8217
8218 if (!tracer)
8219 return;
8220
8221 flags = tracer->flags;
8222
8223 if (!flags || !flags->opts)
8224 return;
8225
8226 /*
8227 * If this is an instance, only create flags for tracers
8228 * the instance may have.
8229 */
8230 if (!trace_ok_for_array(tracer, tr))
8231 return;
8232
8233 for (i = 0; i < tr->nr_topts; i++) {
8234 /* Make sure there's no duplicate flags. */
8235 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8236 return;
8237 }
8238
8239 opts = flags->opts;
8240
8241 for (cnt = 0; opts[cnt].name; cnt++)
8242 ;
8243
8244 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8245 if (!topts)
8246 return;
8247
8248 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8249 GFP_KERNEL);
8250 if (!tr_topts) {
8251 kfree(topts);
8252 return;
8253 }
8254
8255 tr->topts = tr_topts;
8256 tr->topts[tr->nr_topts].tracer = tracer;
8257 tr->topts[tr->nr_topts].topts = topts;
8258 tr->nr_topts++;
8259
8260 for (cnt = 0; opts[cnt].name; cnt++) {
8261 create_trace_option_file(tr, &topts[cnt], flags,
8262 &opts[cnt]);
8263 WARN_ONCE(topts[cnt].entry == NULL,
8264 "Failed to create trace option: %s",
8265 opts[cnt].name);
8266 }
8267 }
8268
8269 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8270 create_trace_option_core_file(struct trace_array *tr,
8271 const char *option, long index)
8272 {
8273 struct dentry *t_options;
8274
8275 t_options = trace_options_init_dentry(tr);
8276 if (!t_options)
8277 return NULL;
8278
8279 return trace_create_file(option, 0644, t_options,
8280 (void *)&tr->trace_flags_index[index],
8281 &trace_options_core_fops);
8282 }
8283
create_trace_options_dir(struct trace_array * tr)8284 static void create_trace_options_dir(struct trace_array *tr)
8285 {
8286 struct dentry *t_options;
8287 bool top_level = tr == &global_trace;
8288 int i;
8289
8290 t_options = trace_options_init_dentry(tr);
8291 if (!t_options)
8292 return;
8293
8294 for (i = 0; trace_options[i]; i++) {
8295 if (top_level ||
8296 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8297 create_trace_option_core_file(tr, trace_options[i], i);
8298 }
8299 }
8300
8301 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8302 rb_simple_read(struct file *filp, char __user *ubuf,
8303 size_t cnt, loff_t *ppos)
8304 {
8305 struct trace_array *tr = filp->private_data;
8306 char buf[64];
8307 int r;
8308
8309 r = tracer_tracing_is_on(tr);
8310 r = sprintf(buf, "%d\n", r);
8311
8312 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8313 }
8314
8315 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8316 rb_simple_write(struct file *filp, const char __user *ubuf,
8317 size_t cnt, loff_t *ppos)
8318 {
8319 struct trace_array *tr = filp->private_data;
8320 struct ring_buffer *buffer = tr->trace_buffer.buffer;
8321 unsigned long val;
8322 int ret;
8323
8324 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8325 if (ret)
8326 return ret;
8327
8328 if (buffer) {
8329 mutex_lock(&trace_types_lock);
8330 if (!!val == tracer_tracing_is_on(tr)) {
8331 val = 0; /* do nothing */
8332 } else if (val) {
8333 tracer_tracing_on(tr);
8334 if (tr->current_trace->start)
8335 tr->current_trace->start(tr);
8336 } else {
8337 tracer_tracing_off(tr);
8338 if (tr->current_trace->stop)
8339 tr->current_trace->stop(tr);
8340 }
8341 mutex_unlock(&trace_types_lock);
8342 }
8343
8344 (*ppos)++;
8345
8346 return cnt;
8347 }
8348
8349 static const struct file_operations rb_simple_fops = {
8350 .open = tracing_open_generic_tr,
8351 .read = rb_simple_read,
8352 .write = rb_simple_write,
8353 .release = tracing_release_generic_tr,
8354 .llseek = default_llseek,
8355 };
8356
8357 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8358 buffer_percent_read(struct file *filp, char __user *ubuf,
8359 size_t cnt, loff_t *ppos)
8360 {
8361 struct trace_array *tr = filp->private_data;
8362 char buf[64];
8363 int r;
8364
8365 r = tr->buffer_percent;
8366 r = sprintf(buf, "%d\n", r);
8367
8368 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8369 }
8370
8371 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8372 buffer_percent_write(struct file *filp, const char __user *ubuf,
8373 size_t cnt, loff_t *ppos)
8374 {
8375 struct trace_array *tr = filp->private_data;
8376 unsigned long val;
8377 int ret;
8378
8379 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8380 if (ret)
8381 return ret;
8382
8383 if (val > 100)
8384 return -EINVAL;
8385
8386 tr->buffer_percent = val;
8387
8388 (*ppos)++;
8389
8390 return cnt;
8391 }
8392
8393 static const struct file_operations buffer_percent_fops = {
8394 .open = tracing_open_generic_tr,
8395 .read = buffer_percent_read,
8396 .write = buffer_percent_write,
8397 .release = tracing_release_generic_tr,
8398 .llseek = default_llseek,
8399 };
8400
8401 static struct dentry *trace_instance_dir;
8402
8403 static void
8404 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8405
8406 static int
allocate_trace_buffer(struct trace_array * tr,struct trace_buffer * buf,int size)8407 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8408 {
8409 enum ring_buffer_flags rb_flags;
8410
8411 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8412
8413 buf->tr = tr;
8414
8415 buf->buffer = ring_buffer_alloc(size, rb_flags);
8416 if (!buf->buffer)
8417 return -ENOMEM;
8418
8419 buf->data = alloc_percpu(struct trace_array_cpu);
8420 if (!buf->data) {
8421 ring_buffer_free(buf->buffer);
8422 buf->buffer = NULL;
8423 return -ENOMEM;
8424 }
8425
8426 /* Allocate the first page for all buffers */
8427 set_buffer_entries(&tr->trace_buffer,
8428 ring_buffer_size(tr->trace_buffer.buffer, 0));
8429
8430 return 0;
8431 }
8432
allocate_trace_buffers(struct trace_array * tr,int size)8433 static int allocate_trace_buffers(struct trace_array *tr, int size)
8434 {
8435 int ret;
8436
8437 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8438 if (ret)
8439 return ret;
8440
8441 #ifdef CONFIG_TRACER_MAX_TRACE
8442 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8443 allocate_snapshot ? size : 1);
8444 if (WARN_ON(ret)) {
8445 ring_buffer_free(tr->trace_buffer.buffer);
8446 tr->trace_buffer.buffer = NULL;
8447 free_percpu(tr->trace_buffer.data);
8448 tr->trace_buffer.data = NULL;
8449 return -ENOMEM;
8450 }
8451 tr->allocated_snapshot = allocate_snapshot;
8452
8453 /*
8454 * Only the top level trace array gets its snapshot allocated
8455 * from the kernel command line.
8456 */
8457 allocate_snapshot = false;
8458 #endif
8459
8460 /*
8461 * Because of some magic with the way alloc_percpu() works on
8462 * x86_64, we need to synchronize the pgd of all the tables,
8463 * otherwise the trace events that happen in x86_64 page fault
8464 * handlers can't cope with accessing the chance that a
8465 * alloc_percpu()'d memory might be touched in the page fault trace
8466 * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
8467 * calls in tracing, because something might get triggered within a
8468 * page fault trace event!
8469 */
8470 vmalloc_sync_mappings();
8471
8472 return 0;
8473 }
8474
free_trace_buffer(struct trace_buffer * buf)8475 static void free_trace_buffer(struct trace_buffer *buf)
8476 {
8477 if (buf->buffer) {
8478 ring_buffer_free(buf->buffer);
8479 buf->buffer = NULL;
8480 free_percpu(buf->data);
8481 buf->data = NULL;
8482 }
8483 }
8484
free_trace_buffers(struct trace_array * tr)8485 static void free_trace_buffers(struct trace_array *tr)
8486 {
8487 if (!tr)
8488 return;
8489
8490 free_trace_buffer(&tr->trace_buffer);
8491
8492 #ifdef CONFIG_TRACER_MAX_TRACE
8493 free_trace_buffer(&tr->max_buffer);
8494 #endif
8495 }
8496
init_trace_flags_index(struct trace_array * tr)8497 static void init_trace_flags_index(struct trace_array *tr)
8498 {
8499 int i;
8500
8501 /* Used by the trace options files */
8502 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8503 tr->trace_flags_index[i] = i;
8504 }
8505
__update_tracer_options(struct trace_array * tr)8506 static void __update_tracer_options(struct trace_array *tr)
8507 {
8508 struct tracer *t;
8509
8510 for (t = trace_types; t; t = t->next)
8511 add_tracer_options(tr, t);
8512 }
8513
update_tracer_options(struct trace_array * tr)8514 static void update_tracer_options(struct trace_array *tr)
8515 {
8516 mutex_lock(&trace_types_lock);
8517 tracer_options_updated = true;
8518 __update_tracer_options(tr);
8519 mutex_unlock(&trace_types_lock);
8520 }
8521
trace_array_create(const char * name)8522 struct trace_array *trace_array_create(const char *name)
8523 {
8524 struct trace_array *tr;
8525 int ret;
8526
8527 mutex_lock(&event_mutex);
8528 mutex_lock(&trace_types_lock);
8529
8530 ret = -EEXIST;
8531 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8532 if (tr->name && strcmp(tr->name, name) == 0)
8533 goto out_unlock;
8534 }
8535
8536 ret = -ENOMEM;
8537 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8538 if (!tr)
8539 goto out_unlock;
8540
8541 tr->name = kstrdup(name, GFP_KERNEL);
8542 if (!tr->name)
8543 goto out_free_tr;
8544
8545 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8546 goto out_free_tr;
8547
8548 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8549
8550 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8551
8552 raw_spin_lock_init(&tr->start_lock);
8553
8554 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8555
8556 tr->current_trace = &nop_trace;
8557
8558 INIT_LIST_HEAD(&tr->systems);
8559 INIT_LIST_HEAD(&tr->events);
8560 INIT_LIST_HEAD(&tr->hist_vars);
8561 INIT_LIST_HEAD(&tr->err_log);
8562
8563 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8564 goto out_free_tr;
8565
8566 tr->dir = tracefs_create_dir(name, trace_instance_dir);
8567 if (!tr->dir)
8568 goto out_free_tr;
8569
8570 ret = event_trace_add_tracer(tr->dir, tr);
8571 if (ret) {
8572 tracefs_remove_recursive(tr->dir);
8573 goto out_free_tr;
8574 }
8575
8576 ftrace_init_trace_array(tr);
8577
8578 init_tracer_tracefs(tr, tr->dir);
8579 init_trace_flags_index(tr);
8580 __update_tracer_options(tr);
8581
8582 list_add(&tr->list, &ftrace_trace_arrays);
8583
8584 mutex_unlock(&trace_types_lock);
8585 mutex_unlock(&event_mutex);
8586
8587 return tr;
8588
8589 out_free_tr:
8590 free_trace_buffers(tr);
8591 free_cpumask_var(tr->tracing_cpumask);
8592 kfree(tr->name);
8593 kfree(tr);
8594
8595 out_unlock:
8596 mutex_unlock(&trace_types_lock);
8597 mutex_unlock(&event_mutex);
8598
8599 return ERR_PTR(ret);
8600 }
8601 EXPORT_SYMBOL_GPL(trace_array_create);
8602
instance_mkdir(const char * name)8603 static int instance_mkdir(const char *name)
8604 {
8605 return PTR_ERR_OR_ZERO(trace_array_create(name));
8606 }
8607
__remove_instance(struct trace_array * tr)8608 static int __remove_instance(struct trace_array *tr)
8609 {
8610 int i;
8611
8612 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8613 return -EBUSY;
8614
8615 list_del(&tr->list);
8616
8617 /* Disable all the flags that were enabled coming in */
8618 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8619 if ((1 << i) & ZEROED_TRACE_FLAGS)
8620 set_tracer_flag(tr, 1 << i, 0);
8621 }
8622
8623 tracing_set_nop(tr);
8624 clear_ftrace_function_probes(tr);
8625 event_trace_del_tracer(tr);
8626 ftrace_clear_pids(tr);
8627 ftrace_destroy_function_files(tr);
8628 tracefs_remove_recursive(tr->dir);
8629 free_trace_buffers(tr);
8630 clear_tracing_err_log(tr);
8631
8632 for (i = 0; i < tr->nr_topts; i++) {
8633 kfree(tr->topts[i].topts);
8634 }
8635 kfree(tr->topts);
8636
8637 free_cpumask_var(tr->tracing_cpumask);
8638 kfree(tr->name);
8639 kfree(tr);
8640 tr = NULL;
8641
8642 return 0;
8643 }
8644
trace_array_destroy(struct trace_array * this_tr)8645 int trace_array_destroy(struct trace_array *this_tr)
8646 {
8647 struct trace_array *tr;
8648 int ret;
8649
8650 if (!this_tr)
8651 return -EINVAL;
8652
8653 mutex_lock(&event_mutex);
8654 mutex_lock(&trace_types_lock);
8655
8656 ret = -ENODEV;
8657
8658 /* Making sure trace array exists before destroying it. */
8659 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8660 if (tr == this_tr) {
8661 ret = __remove_instance(tr);
8662 break;
8663 }
8664 }
8665
8666 mutex_unlock(&trace_types_lock);
8667 mutex_unlock(&event_mutex);
8668
8669 return ret;
8670 }
8671 EXPORT_SYMBOL_GPL(trace_array_destroy);
8672
instance_rmdir(const char * name)8673 static int instance_rmdir(const char *name)
8674 {
8675 struct trace_array *tr;
8676 int ret;
8677
8678 mutex_lock(&event_mutex);
8679 mutex_lock(&trace_types_lock);
8680
8681 ret = -ENODEV;
8682 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8683 if (tr->name && strcmp(tr->name, name) == 0) {
8684 ret = __remove_instance(tr);
8685 break;
8686 }
8687 }
8688
8689 mutex_unlock(&trace_types_lock);
8690 mutex_unlock(&event_mutex);
8691
8692 return ret;
8693 }
8694
create_trace_instances(struct dentry * d_tracer)8695 static __init void create_trace_instances(struct dentry *d_tracer)
8696 {
8697 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8698 instance_mkdir,
8699 instance_rmdir);
8700 if (WARN_ON(!trace_instance_dir))
8701 return;
8702 }
8703
8704 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)8705 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8706 {
8707 struct trace_event_file *file;
8708 int cpu;
8709
8710 trace_create_file("available_tracers", 0444, d_tracer,
8711 tr, &show_traces_fops);
8712
8713 trace_create_file("current_tracer", 0644, d_tracer,
8714 tr, &set_tracer_fops);
8715
8716 trace_create_file("tracing_cpumask", 0644, d_tracer,
8717 tr, &tracing_cpumask_fops);
8718
8719 trace_create_file("trace_options", 0644, d_tracer,
8720 tr, &tracing_iter_fops);
8721
8722 trace_create_file("trace", 0644, d_tracer,
8723 tr, &tracing_fops);
8724
8725 trace_create_file("trace_pipe", 0444, d_tracer,
8726 tr, &tracing_pipe_fops);
8727
8728 trace_create_file("buffer_size_kb", 0644, d_tracer,
8729 tr, &tracing_entries_fops);
8730
8731 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8732 tr, &tracing_total_entries_fops);
8733
8734 trace_create_file("free_buffer", 0200, d_tracer,
8735 tr, &tracing_free_buffer_fops);
8736
8737 trace_create_file("trace_marker", 0220, d_tracer,
8738 tr, &tracing_mark_fops);
8739
8740 file = __find_event_file(tr, "ftrace", "print");
8741 if (file && file->dir)
8742 trace_create_file("trigger", 0644, file->dir, file,
8743 &event_trigger_fops);
8744 tr->trace_marker_file = file;
8745
8746 trace_create_file("trace_marker_raw", 0220, d_tracer,
8747 tr, &tracing_mark_raw_fops);
8748
8749 trace_create_file("trace_clock", 0644, d_tracer, tr,
8750 &trace_clock_fops);
8751
8752 trace_create_file("tracing_on", 0644, d_tracer,
8753 tr, &rb_simple_fops);
8754
8755 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8756 &trace_time_stamp_mode_fops);
8757
8758 tr->buffer_percent = 50;
8759
8760 trace_create_file("buffer_percent", 0444, d_tracer,
8761 tr, &buffer_percent_fops);
8762
8763 create_trace_options_dir(tr);
8764
8765 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8766 trace_create_file("tracing_max_latency", 0644, d_tracer,
8767 &tr->max_latency, &tracing_max_lat_fops);
8768 #endif
8769
8770 if (ftrace_create_function_files(tr, d_tracer))
8771 WARN(1, "Could not allocate function filter files");
8772
8773 #ifdef CONFIG_TRACER_SNAPSHOT
8774 trace_create_file("snapshot", 0644, d_tracer,
8775 tr, &snapshot_fops);
8776 #endif
8777
8778 trace_create_file("error_log", 0644, d_tracer,
8779 tr, &tracing_err_log_fops);
8780
8781 for_each_tracing_cpu(cpu)
8782 tracing_init_tracefs_percpu(tr, cpu);
8783
8784 ftrace_init_tracefs(tr, d_tracer);
8785 }
8786
trace_automount(struct dentry * mntpt,void * ingore)8787 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8788 {
8789 struct vfsmount *mnt;
8790 struct file_system_type *type;
8791
8792 /*
8793 * To maintain backward compatibility for tools that mount
8794 * debugfs to get to the tracing facility, tracefs is automatically
8795 * mounted to the debugfs/tracing directory.
8796 */
8797 type = get_fs_type("tracefs");
8798 if (!type)
8799 return NULL;
8800 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8801 put_filesystem(type);
8802 if (IS_ERR(mnt))
8803 return NULL;
8804 mntget(mnt);
8805
8806 return mnt;
8807 }
8808
8809 /**
8810 * tracing_init_dentry - initialize top level trace array
8811 *
8812 * This is called when creating files or directories in the tracing
8813 * directory. It is called via fs_initcall() by any of the boot up code
8814 * and expects to return the dentry of the top level tracing directory.
8815 */
tracing_init_dentry(void)8816 struct dentry *tracing_init_dentry(void)
8817 {
8818 struct trace_array *tr = &global_trace;
8819
8820 if (security_locked_down(LOCKDOWN_TRACEFS)) {
8821 pr_warning("Tracing disabled due to lockdown\n");
8822 return ERR_PTR(-EPERM);
8823 }
8824
8825 /* The top level trace array uses NULL as parent */
8826 if (tr->dir)
8827 return NULL;
8828
8829 if (WARN_ON(!tracefs_initialized()) ||
8830 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8831 WARN_ON(!debugfs_initialized())))
8832 return ERR_PTR(-ENODEV);
8833
8834 /*
8835 * As there may still be users that expect the tracing
8836 * files to exist in debugfs/tracing, we must automount
8837 * the tracefs file system there, so older tools still
8838 * work with the newer kerenl.
8839 */
8840 tr->dir = debugfs_create_automount("tracing", NULL,
8841 trace_automount, NULL);
8842
8843 return NULL;
8844 }
8845
8846 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8847 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8848
trace_eval_init(void)8849 static void __init trace_eval_init(void)
8850 {
8851 int len;
8852
8853 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8854 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8855 }
8856
8857 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)8858 static void trace_module_add_evals(struct module *mod)
8859 {
8860 if (!mod->num_trace_evals)
8861 return;
8862
8863 /*
8864 * Modules with bad taint do not have events created, do
8865 * not bother with enums either.
8866 */
8867 if (trace_module_has_bad_taint(mod))
8868 return;
8869
8870 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8871 }
8872
8873 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)8874 static void trace_module_remove_evals(struct module *mod)
8875 {
8876 union trace_eval_map_item *map;
8877 union trace_eval_map_item **last = &trace_eval_maps;
8878
8879 if (!mod->num_trace_evals)
8880 return;
8881
8882 mutex_lock(&trace_eval_mutex);
8883
8884 map = trace_eval_maps;
8885
8886 while (map) {
8887 if (map->head.mod == mod)
8888 break;
8889 map = trace_eval_jmp_to_tail(map);
8890 last = &map->tail.next;
8891 map = map->tail.next;
8892 }
8893 if (!map)
8894 goto out;
8895
8896 *last = trace_eval_jmp_to_tail(map)->tail.next;
8897 kfree(map);
8898 out:
8899 mutex_unlock(&trace_eval_mutex);
8900 }
8901 #else
trace_module_remove_evals(struct module * mod)8902 static inline void trace_module_remove_evals(struct module *mod) { }
8903 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8904
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)8905 static int trace_module_notify(struct notifier_block *self,
8906 unsigned long val, void *data)
8907 {
8908 struct module *mod = data;
8909
8910 switch (val) {
8911 case MODULE_STATE_COMING:
8912 trace_module_add_evals(mod);
8913 break;
8914 case MODULE_STATE_GOING:
8915 trace_module_remove_evals(mod);
8916 break;
8917 }
8918
8919 return 0;
8920 }
8921
8922 static struct notifier_block trace_module_nb = {
8923 .notifier_call = trace_module_notify,
8924 .priority = 0,
8925 };
8926 #endif /* CONFIG_MODULES */
8927
tracer_init_tracefs(void)8928 static __init int tracer_init_tracefs(void)
8929 {
8930 struct dentry *d_tracer;
8931
8932 trace_access_lock_init();
8933
8934 d_tracer = tracing_init_dentry();
8935 if (IS_ERR(d_tracer))
8936 return 0;
8937
8938 event_trace_init();
8939
8940 init_tracer_tracefs(&global_trace, d_tracer);
8941 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8942
8943 trace_create_file("tracing_thresh", 0644, d_tracer,
8944 &global_trace, &tracing_thresh_fops);
8945
8946 trace_create_file("README", 0444, d_tracer,
8947 NULL, &tracing_readme_fops);
8948
8949 trace_create_file("saved_cmdlines", 0444, d_tracer,
8950 NULL, &tracing_saved_cmdlines_fops);
8951
8952 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8953 NULL, &tracing_saved_cmdlines_size_fops);
8954
8955 trace_create_file("saved_tgids", 0444, d_tracer,
8956 NULL, &tracing_saved_tgids_fops);
8957
8958 trace_eval_init();
8959
8960 trace_create_eval_file(d_tracer);
8961
8962 #ifdef CONFIG_MODULES
8963 register_module_notifier(&trace_module_nb);
8964 #endif
8965
8966 #ifdef CONFIG_DYNAMIC_FTRACE
8967 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8968 NULL, &tracing_dyn_info_fops);
8969 #endif
8970
8971 create_trace_instances(d_tracer);
8972
8973 update_tracer_options(&global_trace);
8974
8975 return 0;
8976 }
8977
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)8978 static int trace_panic_handler(struct notifier_block *this,
8979 unsigned long event, void *unused)
8980 {
8981 if (ftrace_dump_on_oops)
8982 ftrace_dump(ftrace_dump_on_oops);
8983 return NOTIFY_OK;
8984 }
8985
8986 static struct notifier_block trace_panic_notifier = {
8987 .notifier_call = trace_panic_handler,
8988 .next = NULL,
8989 .priority = 150 /* priority: INT_MAX >= x >= 0 */
8990 };
8991
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)8992 static int trace_die_handler(struct notifier_block *self,
8993 unsigned long val,
8994 void *data)
8995 {
8996 switch (val) {
8997 case DIE_OOPS:
8998 if (ftrace_dump_on_oops)
8999 ftrace_dump(ftrace_dump_on_oops);
9000 break;
9001 default:
9002 break;
9003 }
9004 return NOTIFY_OK;
9005 }
9006
9007 static struct notifier_block trace_die_notifier = {
9008 .notifier_call = trace_die_handler,
9009 .priority = 200
9010 };
9011
9012 /*
9013 * printk is set to max of 1024, we really don't need it that big.
9014 * Nothing should be printing 1000 characters anyway.
9015 */
9016 #define TRACE_MAX_PRINT 1000
9017
9018 /*
9019 * Define here KERN_TRACE so that we have one place to modify
9020 * it if we decide to change what log level the ftrace dump
9021 * should be at.
9022 */
9023 #define KERN_TRACE KERN_EMERG
9024
9025 void
trace_printk_seq(struct trace_seq * s)9026 trace_printk_seq(struct trace_seq *s)
9027 {
9028 /* Probably should print a warning here. */
9029 if (s->seq.len >= TRACE_MAX_PRINT)
9030 s->seq.len = TRACE_MAX_PRINT;
9031
9032 /*
9033 * More paranoid code. Although the buffer size is set to
9034 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9035 * an extra layer of protection.
9036 */
9037 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9038 s->seq.len = s->seq.size - 1;
9039
9040 /* should be zero ended, but we are paranoid. */
9041 s->buffer[s->seq.len] = 0;
9042
9043 printk(KERN_TRACE "%s", s->buffer);
9044
9045 trace_seq_init(s);
9046 }
9047
trace_init_global_iter(struct trace_iterator * iter)9048 void trace_init_global_iter(struct trace_iterator *iter)
9049 {
9050 iter->tr = &global_trace;
9051 iter->trace = iter->tr->current_trace;
9052 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9053 iter->trace_buffer = &global_trace.trace_buffer;
9054
9055 if (iter->trace && iter->trace->open)
9056 iter->trace->open(iter);
9057
9058 /* Annotate start of buffers if we had overruns */
9059 if (ring_buffer_overruns(iter->trace_buffer->buffer))
9060 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9061
9062 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9063 if (trace_clocks[iter->tr->clock_id].in_ns)
9064 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9065 }
9066
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9067 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9068 {
9069 /* use static because iter can be a bit big for the stack */
9070 static struct trace_iterator iter;
9071 static atomic_t dump_running;
9072 struct trace_array *tr = &global_trace;
9073 unsigned int old_userobj;
9074 unsigned long flags;
9075 int cnt = 0, cpu;
9076
9077 /* Only allow one dump user at a time. */
9078 if (atomic_inc_return(&dump_running) != 1) {
9079 atomic_dec(&dump_running);
9080 return;
9081 }
9082
9083 /*
9084 * Always turn off tracing when we dump.
9085 * We don't need to show trace output of what happens
9086 * between multiple crashes.
9087 *
9088 * If the user does a sysrq-z, then they can re-enable
9089 * tracing with echo 1 > tracing_on.
9090 */
9091 tracing_off();
9092
9093 local_irq_save(flags);
9094 printk_nmi_direct_enter();
9095
9096 /* Simulate the iterator */
9097 trace_init_global_iter(&iter);
9098
9099 for_each_tracing_cpu(cpu) {
9100 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9101 }
9102
9103 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9104
9105 /* don't look at user memory in panic mode */
9106 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9107
9108 switch (oops_dump_mode) {
9109 case DUMP_ALL:
9110 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9111 break;
9112 case DUMP_ORIG:
9113 iter.cpu_file = raw_smp_processor_id();
9114 break;
9115 case DUMP_NONE:
9116 goto out_enable;
9117 default:
9118 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9119 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9120 }
9121
9122 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9123
9124 /* Did function tracer already get disabled? */
9125 if (ftrace_is_dead()) {
9126 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9127 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9128 }
9129
9130 /*
9131 * We need to stop all tracing on all CPUS to read the
9132 * the next buffer. This is a bit expensive, but is
9133 * not done often. We fill all what we can read,
9134 * and then release the locks again.
9135 */
9136
9137 while (!trace_empty(&iter)) {
9138
9139 if (!cnt)
9140 printk(KERN_TRACE "---------------------------------\n");
9141
9142 cnt++;
9143
9144 trace_iterator_reset(&iter);
9145 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9146
9147 if (trace_find_next_entry_inc(&iter) != NULL) {
9148 int ret;
9149
9150 ret = print_trace_line(&iter);
9151 if (ret != TRACE_TYPE_NO_CONSUME)
9152 trace_consume(&iter);
9153 }
9154 touch_nmi_watchdog();
9155
9156 trace_printk_seq(&iter.seq);
9157 }
9158
9159 if (!cnt)
9160 printk(KERN_TRACE " (ftrace buffer empty)\n");
9161 else
9162 printk(KERN_TRACE "---------------------------------\n");
9163
9164 out_enable:
9165 tr->trace_flags |= old_userobj;
9166
9167 for_each_tracing_cpu(cpu) {
9168 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9169 }
9170 atomic_dec(&dump_running);
9171 printk_nmi_direct_exit();
9172 local_irq_restore(flags);
9173 }
9174 EXPORT_SYMBOL_GPL(ftrace_dump);
9175
trace_run_command(const char * buf,int (* createfn)(int,char **))9176 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9177 {
9178 char **argv;
9179 int argc, ret;
9180
9181 argc = 0;
9182 ret = 0;
9183 argv = argv_split(GFP_KERNEL, buf, &argc);
9184 if (!argv)
9185 return -ENOMEM;
9186
9187 if (argc)
9188 ret = createfn(argc, argv);
9189
9190 argv_free(argv);
9191
9192 return ret;
9193 }
9194
9195 #define WRITE_BUFSIZE 4096
9196
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9197 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9198 size_t count, loff_t *ppos,
9199 int (*createfn)(int, char **))
9200 {
9201 char *kbuf, *buf, *tmp;
9202 int ret = 0;
9203 size_t done = 0;
9204 size_t size;
9205
9206 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9207 if (!kbuf)
9208 return -ENOMEM;
9209
9210 while (done < count) {
9211 size = count - done;
9212
9213 if (size >= WRITE_BUFSIZE)
9214 size = WRITE_BUFSIZE - 1;
9215
9216 if (copy_from_user(kbuf, buffer + done, size)) {
9217 ret = -EFAULT;
9218 goto out;
9219 }
9220 kbuf[size] = '\0';
9221 buf = kbuf;
9222 do {
9223 tmp = strchr(buf, '\n');
9224 if (tmp) {
9225 *tmp = '\0';
9226 size = tmp - buf + 1;
9227 } else {
9228 size = strlen(buf);
9229 if (done + size < count) {
9230 if (buf != kbuf)
9231 break;
9232 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9233 pr_warn("Line length is too long: Should be less than %d\n",
9234 WRITE_BUFSIZE - 2);
9235 ret = -EINVAL;
9236 goto out;
9237 }
9238 }
9239 done += size;
9240
9241 /* Remove comments */
9242 tmp = strchr(buf, '#');
9243
9244 if (tmp)
9245 *tmp = '\0';
9246
9247 ret = trace_run_command(buf, createfn);
9248 if (ret)
9249 goto out;
9250 buf += size;
9251
9252 } while (done < count);
9253 }
9254 ret = done;
9255
9256 out:
9257 kfree(kbuf);
9258
9259 return ret;
9260 }
9261
tracer_alloc_buffers(void)9262 __init static int tracer_alloc_buffers(void)
9263 {
9264 int ring_buf_size;
9265 int ret = -ENOMEM;
9266
9267
9268 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9269 pr_warning("Tracing disabled due to lockdown\n");
9270 return -EPERM;
9271 }
9272
9273 /*
9274 * Make sure we don't accidently add more trace options
9275 * than we have bits for.
9276 */
9277 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9278
9279 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9280 goto out;
9281
9282 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9283 goto out_free_buffer_mask;
9284
9285 /* Only allocate trace_printk buffers if a trace_printk exists */
9286 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9287 /* Must be called before global_trace.buffer is allocated */
9288 trace_printk_init_buffers();
9289
9290 /* To save memory, keep the ring buffer size to its minimum */
9291 if (ring_buffer_expanded)
9292 ring_buf_size = trace_buf_size;
9293 else
9294 ring_buf_size = 1;
9295
9296 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9297 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9298
9299 raw_spin_lock_init(&global_trace.start_lock);
9300
9301 /*
9302 * The prepare callbacks allocates some memory for the ring buffer. We
9303 * don't free the buffer if the if the CPU goes down. If we were to free
9304 * the buffer, then the user would lose any trace that was in the
9305 * buffer. The memory will be removed once the "instance" is removed.
9306 */
9307 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9308 "trace/RB:preapre", trace_rb_cpu_prepare,
9309 NULL);
9310 if (ret < 0)
9311 goto out_free_cpumask;
9312 /* Used for event triggers */
9313 ret = -ENOMEM;
9314 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9315 if (!temp_buffer)
9316 goto out_rm_hp_state;
9317
9318 if (trace_create_savedcmd() < 0)
9319 goto out_free_temp_buffer;
9320
9321 /* TODO: make the number of buffers hot pluggable with CPUS */
9322 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9323 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9324 WARN_ON(1);
9325 goto out_free_savedcmd;
9326 }
9327
9328 if (global_trace.buffer_disabled)
9329 tracing_off();
9330
9331 if (trace_boot_clock) {
9332 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9333 if (ret < 0)
9334 pr_warn("Trace clock %s not defined, going back to default\n",
9335 trace_boot_clock);
9336 }
9337
9338 /*
9339 * register_tracer() might reference current_trace, so it
9340 * needs to be set before we register anything. This is
9341 * just a bootstrap of current_trace anyway.
9342 */
9343 global_trace.current_trace = &nop_trace;
9344
9345 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9346
9347 ftrace_init_global_array_ops(&global_trace);
9348
9349 init_trace_flags_index(&global_trace);
9350
9351 register_tracer(&nop_trace);
9352
9353 /* Function tracing may start here (via kernel command line) */
9354 init_function_trace();
9355
9356 /* All seems OK, enable tracing */
9357 tracing_disabled = 0;
9358
9359 atomic_notifier_chain_register(&panic_notifier_list,
9360 &trace_panic_notifier);
9361
9362 register_die_notifier(&trace_die_notifier);
9363
9364 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9365
9366 INIT_LIST_HEAD(&global_trace.systems);
9367 INIT_LIST_HEAD(&global_trace.events);
9368 INIT_LIST_HEAD(&global_trace.hist_vars);
9369 INIT_LIST_HEAD(&global_trace.err_log);
9370 list_add(&global_trace.list, &ftrace_trace_arrays);
9371
9372 apply_trace_boot_options();
9373
9374 register_snapshot_cmd();
9375
9376 return 0;
9377
9378 out_free_savedcmd:
9379 free_saved_cmdlines_buffer(savedcmd);
9380 out_free_temp_buffer:
9381 ring_buffer_free(temp_buffer);
9382 out_rm_hp_state:
9383 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9384 out_free_cpumask:
9385 free_cpumask_var(global_trace.tracing_cpumask);
9386 out_free_buffer_mask:
9387 free_cpumask_var(tracing_buffer_mask);
9388 out:
9389 return ret;
9390 }
9391
early_trace_init(void)9392 void __init early_trace_init(void)
9393 {
9394 if (tracepoint_printk) {
9395 tracepoint_print_iter =
9396 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9397 if (WARN_ON(!tracepoint_print_iter))
9398 tracepoint_printk = 0;
9399 else
9400 static_key_enable(&tracepoint_printk_key.key);
9401 }
9402 tracer_alloc_buffers();
9403
9404 init_events();
9405 }
9406
trace_init(void)9407 void __init trace_init(void)
9408 {
9409 trace_event_init();
9410 }
9411
clear_boot_tracer(void)9412 __init static int clear_boot_tracer(void)
9413 {
9414 /*
9415 * The default tracer at boot buffer is an init section.
9416 * This function is called in lateinit. If we did not
9417 * find the boot tracer, then clear it out, to prevent
9418 * later registration from accessing the buffer that is
9419 * about to be freed.
9420 */
9421 if (!default_bootup_tracer)
9422 return 0;
9423
9424 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9425 default_bootup_tracer);
9426 default_bootup_tracer = NULL;
9427
9428 return 0;
9429 }
9430
9431 fs_initcall(tracer_init_tracefs);
9432 late_initcall_sync(clear_boot_tracer);
9433
9434 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9435 __init static int tracing_set_default_clock(void)
9436 {
9437 /* sched_clock_stable() is determined in late_initcall */
9438 if (!trace_boot_clock && !sched_clock_stable()) {
9439 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9440 pr_warn("Can not set tracing clock due to lockdown\n");
9441 return -EPERM;
9442 }
9443
9444 printk(KERN_WARNING
9445 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9446 "If you want to keep using the local clock, then add:\n"
9447 " \"trace_clock=local\"\n"
9448 "on the kernel command line\n");
9449 tracing_set_clock(&global_trace, "global");
9450 }
9451
9452 return 0;
9453 }
9454 late_initcall_sync(tracing_set_default_clock);
9455 #endif
9456