1 /*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
13 */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49 * On boot up, the ring buffer is set to the minimum size, so that
50 * we do not waste memory on systems that are not using tracing.
51 */
52 bool ring_buffer_expanded;
53
54 /*
55 * We need to change this state when a selftest is running.
56 * A selftest will lurk into the ring-buffer to count the
57 * entries inserted during the selftest although some concurrent
58 * insertions into the ring-buffer such as trace_printk could occurred
59 * at the same time, giving false positive or negative results.
60 */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64 * If a tracer is running, we do not want to run SELFTEST.
65 */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 { }
75 };
76
77 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 return 0;
81 }
82
83 /*
84 * To prevent the comm cache from being overwritten when no
85 * tracing is active, only save the comm when a trace event
86 * occurred.
87 */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91 * Kill all tracing for good (never come back).
92 * It is initialized to 1 but will turn to zero if the initialization
93 * of the tracer is successful. But that is the only place that sets
94 * this back to zero.
95 */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly tracing_buffer_mask;
99
100 /*
101 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102 *
103 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104 * is set, then ftrace_dump is called. This will output the contents
105 * of the ftrace buffers to the console. This is very useful for
106 * capturing traces that lead to crashes and outputing it to a
107 * serial console.
108 *
109 * It is default off, but you can enable it with either specifying
110 * "ftrace_dump_on_oops" in the kernel command line, or setting
111 * /proc/sys/kernel/ftrace_dump_on_oops
112 * Set 1 if you want to dump buffers of all CPUs
113 * Set 2 if you want to dump the buffer of the CPU that triggered oops
114 */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 struct module *mod;
125 unsigned long length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131 /*
132 * "end" is first and points to NULL as it must be different
133 * than "mod" or "enum_string"
134 */
135 union trace_enum_map_item *next;
136 const char *end; /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142 * The trace_enum_maps are saved in an array with two extra elements,
143 * one at the beginning, and one at the end. The beginning item contains
144 * the count of the saved maps (head.length), and the module they
145 * belong to if not built in (head.mod). The ending item contains a
146 * pointer to the next array of saved enum_map items.
147 */
148 union trace_enum_map_item {
149 struct trace_enum_map map;
150 struct trace_enum_map_head head;
151 struct trace_enum_map_tail tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE 100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
set_cmdline_ftrace(char * str)165 static int __init set_cmdline_ftrace(char *str)
166 {
167 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 default_bootup_tracer = bootup_tracer_buf;
169 /* We are using ftrace early, expand it */
170 ring_buffer_expanded = true;
171 return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
set_ftrace_dump_on_oops(char * str)175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 if (*str++ != '=' || !*str) {
178 ftrace_dump_on_oops = DUMP_ALL;
179 return 1;
180 }
181
182 if (!strcmp("orig_cpu", str)) {
183 ftrace_dump_on_oops = DUMP_ORIG;
184 return 1;
185 }
186
187 return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
stop_trace_on_warning(char * str)191 static int __init stop_trace_on_warning(char *str)
192 {
193 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 __disable_trace_on_warning = 1;
195 return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
boot_alloc_snapshot(char * str)199 static int __init boot_alloc_snapshot(char *str)
200 {
201 allocate_snapshot = true;
202 /* We also need the main ring buffer expanded */
203 ring_buffer_expanded = true;
204 return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
set_trace_boot_options(char * str)211 static int __init set_trace_boot_options(char *str)
212 {
213 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
set_trace_boot_clock(char * str)221 static int __init set_trace_boot_clock(char *str)
222 {
223 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 trace_boot_clock = trace_boot_clock_buf;
225 return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
set_tracepoint_printk(char * str)229 static int __init set_tracepoint_printk(char *str)
230 {
231 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 tracepoint_printk = 1;
233 return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
ns2usecs(cycle_t nsec)237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 nsec += 500;
240 do_div(nsec, 1000);
241 return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS \
246 (FUNCTION_DEFAULT_FLAGS | \
247 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
248 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
249 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
250 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
254 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 TRACE_ITER_EVENT_FORK
259
260 /*
261 * The global_trace is the descriptor that holds the tracing
262 * buffers for the live tracing. For each CPU, it contains
263 * a link list of pages that will store trace entries. The
264 * page descriptor of the pages in the memory is used to hold
265 * the link list by linking the lru item in the page descriptor
266 * to each of the pages in the buffer per CPU.
267 *
268 * For each active CPU there is a data field that holds the
269 * pages for the buffer for that CPU. Each CPU has the same number
270 * of pages allocated for its buffer.
271 */
272 static struct trace_array global_trace = {
273 .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
trace_array_get(struct trace_array * this_tr)278 int trace_array_get(struct trace_array *this_tr)
279 {
280 struct trace_array *tr;
281 int ret = -ENODEV;
282
283 mutex_lock(&trace_types_lock);
284 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 if (tr == this_tr) {
286 tr->ref++;
287 ret = 0;
288 break;
289 }
290 }
291 mutex_unlock(&trace_types_lock);
292
293 return ret;
294 }
295
__trace_array_put(struct trace_array * this_tr)296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 WARN_ON(!this_tr->ref);
299 this_tr->ref--;
300 }
301
trace_array_put(struct trace_array * this_tr)302 void trace_array_put(struct trace_array *this_tr)
303 {
304 mutex_lock(&trace_types_lock);
305 __trace_array_put(this_tr);
306 mutex_unlock(&trace_types_lock);
307 }
308
call_filter_check_discard(struct trace_event_call * call,void * rec,struct ring_buffer * buffer,struct ring_buffer_event * event)309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 struct ring_buffer *buffer,
311 struct ring_buffer_event *event)
312 {
313 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 !filter_match_preds(call->filter, rec)) {
315 __trace_event_discard_commit(buffer, event);
316 return 1;
317 }
318
319 return 0;
320 }
321
trace_free_pid_list(struct trace_pid_list * pid_list)322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 vfree(pid_list->pids);
325 kfree(pid_list);
326 }
327
328 /**
329 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330 * @filtered_pids: The list of pids to check
331 * @search_pid: The PID to find in @filtered_pids
332 *
333 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334 */
335 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 /*
339 * If pid_max changed after filtered_pids was created, we
340 * by default ignore all pids greater than the previous pid_max.
341 */
342 if (search_pid >= filtered_pids->pid_max)
343 return false;
344
345 return test_bit(search_pid, filtered_pids->pids);
346 }
347
348 /**
349 * trace_ignore_this_task - should a task be ignored for tracing
350 * @filtered_pids: The list of pids to check
351 * @task: The task that should be ignored if not filtered
352 *
353 * Checks if @task should be traced or not from @filtered_pids.
354 * Returns true if @task should *NOT* be traced.
355 * Returns false if @task should be traced.
356 */
357 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct task_struct * task)358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 /*
361 * Return false, because if filtered_pids does not exist,
362 * all pids are good to trace.
363 */
364 if (!filtered_pids)
365 return false;
366
367 return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369
370 /**
371 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372 * @pid_list: The list to modify
373 * @self: The current task for fork or NULL for exit
374 * @task: The task to add or remove
375 *
376 * If adding a task, if @self is defined, the task is only added if @self
377 * is also included in @pid_list. This happens on fork and tasks should
378 * only be added when the parent is listed. If @self is NULL, then the
379 * @task pid will be removed from the list, which would happen on exit
380 * of a task.
381 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 struct task_struct *self,
384 struct task_struct *task)
385 {
386 if (!pid_list)
387 return;
388
389 /* For forks, we only add if the forking task is listed */
390 if (self) {
391 if (!trace_find_filtered_pid(pid_list, self->pid))
392 return;
393 }
394
395 /* Sorry, but we don't support pid_max changing after setting */
396 if (task->pid >= pid_list->pid_max)
397 return;
398
399 /* "self" is set for forks, and NULL for exits */
400 if (self)
401 set_bit(task->pid, pid_list->pids);
402 else
403 clear_bit(task->pid, pid_list->pids);
404 }
405
406 /**
407 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408 * @pid_list: The pid list to show
409 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410 * @pos: The position of the file
411 *
412 * This is used by the seq_file "next" operation to iterate the pids
413 * listed in a trace_pid_list structure.
414 *
415 * Returns the pid+1 as we want to display pid of zero, but NULL would
416 * stop the iteration.
417 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 unsigned long pid = (unsigned long)v;
421
422 (*pos)++;
423
424 /* pid already is +1 of the actual prevous bit */
425 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426
427 /* Return pid + 1 to allow zero to be represented */
428 if (pid < pid_list->pid_max)
429 return (void *)(pid + 1);
430
431 return NULL;
432 }
433
434 /**
435 * trace_pid_start - Used for seq_file to start reading pid lists
436 * @pid_list: The pid list to show
437 * @pos: The position of the file
438 *
439 * This is used by seq_file "start" operation to start the iteration
440 * of listing pids.
441 *
442 * Returns the pid+1 as we want to display pid of zero, but NULL would
443 * stop the iteration.
444 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 unsigned long pid;
448 loff_t l = 0;
449
450 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 if (pid >= pid_list->pid_max)
452 return NULL;
453
454 /* Return pid + 1 so that zero can be the exit value */
455 for (pid++; pid && l < *pos;
456 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 ;
458 return (void *)pid;
459 }
460
461 /**
462 * trace_pid_show - show the current pid in seq_file processing
463 * @m: The seq_file structure to write into
464 * @v: A void pointer of the pid (+1) value to display
465 *
466 * Can be directly used by seq_file operations to display the current
467 * pid value.
468 */
trace_pid_show(struct seq_file * m,void * v)469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 unsigned long pid = (unsigned long)v - 1;
472
473 seq_printf(m, "%lu\n", pid);
474 return 0;
475 }
476
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE 127
479
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 struct trace_pid_list **new_pid_list,
482 const char __user *ubuf, size_t cnt)
483 {
484 struct trace_pid_list *pid_list;
485 struct trace_parser parser;
486 unsigned long val;
487 int nr_pids = 0;
488 ssize_t read = 0;
489 ssize_t ret = 0;
490 loff_t pos;
491 pid_t pid;
492
493 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 return -ENOMEM;
495
496 /*
497 * Always recreate a new array. The write is an all or nothing
498 * operation. Always create a new array when adding new pids by
499 * the user. If the operation fails, then the current list is
500 * not modified.
501 */
502 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 if (!pid_list)
504 return -ENOMEM;
505
506 pid_list->pid_max = READ_ONCE(pid_max);
507
508 /* Only truncating will shrink pid_max */
509 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 pid_list->pid_max = filtered_pids->pid_max;
511
512 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 if (!pid_list->pids) {
514 kfree(pid_list);
515 return -ENOMEM;
516 }
517
518 if (filtered_pids) {
519 /* copy the current bits to the new max */
520 for_each_set_bit(pid, filtered_pids->pids,
521 filtered_pids->pid_max) {
522 set_bit(pid, pid_list->pids);
523 nr_pids++;
524 }
525 }
526
527 while (cnt > 0) {
528
529 pos = 0;
530
531 ret = trace_get_user(&parser, ubuf, cnt, &pos);
532 if (ret < 0 || !trace_parser_loaded(&parser))
533 break;
534
535 read += ret;
536 ubuf += ret;
537 cnt -= ret;
538
539 parser.buffer[parser.idx] = 0;
540
541 ret = -EINVAL;
542 if (kstrtoul(parser.buffer, 0, &val))
543 break;
544 if (val >= pid_list->pid_max)
545 break;
546
547 pid = (pid_t)val;
548
549 set_bit(pid, pid_list->pids);
550 nr_pids++;
551
552 trace_parser_clear(&parser);
553 ret = 0;
554 }
555 trace_parser_put(&parser);
556
557 if (ret < 0) {
558 trace_free_pid_list(pid_list);
559 return ret;
560 }
561
562 if (!nr_pids) {
563 /* Cleared the list of pids */
564 trace_free_pid_list(pid_list);
565 read = ret;
566 pid_list = NULL;
567 }
568
569 *new_pid_list = pid_list;
570
571 return read;
572 }
573
buffer_ftrace_now(struct trace_buffer * buf,int cpu)574 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
575 {
576 u64 ts;
577
578 /* Early boot up does not have a buffer yet */
579 if (!buf->buffer)
580 return trace_clock_local();
581
582 ts = ring_buffer_time_stamp(buf->buffer, cpu);
583 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
584
585 return ts;
586 }
587
ftrace_now(int cpu)588 cycle_t ftrace_now(int cpu)
589 {
590 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
591 }
592
593 /**
594 * tracing_is_enabled - Show if global_trace has been disabled
595 *
596 * Shows if the global trace has been enabled or not. It uses the
597 * mirror flag "buffer_disabled" to be used in fast paths such as for
598 * the irqsoff tracer. But it may be inaccurate due to races. If you
599 * need to know the accurate state, use tracing_is_on() which is a little
600 * slower, but accurate.
601 */
tracing_is_enabled(void)602 int tracing_is_enabled(void)
603 {
604 /*
605 * For quick access (irqsoff uses this in fast path), just
606 * return the mirror variable of the state of the ring buffer.
607 * It's a little racy, but we don't really care.
608 */
609 smp_rmb();
610 return !global_trace.buffer_disabled;
611 }
612
613 /*
614 * trace_buf_size is the size in bytes that is allocated
615 * for a buffer. Note, the number of bytes is always rounded
616 * to page size.
617 *
618 * This number is purposely set to a low number of 16384.
619 * If the dump on oops happens, it will be much appreciated
620 * to not have to wait for all that output. Anyway this can be
621 * boot time and run time configurable.
622 */
623 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
624
625 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
626
627 /* trace_types holds a link list of available tracers. */
628 static struct tracer *trace_types __read_mostly;
629
630 /*
631 * trace_types_lock is used to protect the trace_types list.
632 */
633 DEFINE_MUTEX(trace_types_lock);
634
635 /*
636 * serialize the access of the ring buffer
637 *
638 * ring buffer serializes readers, but it is low level protection.
639 * The validity of the events (which returns by ring_buffer_peek() ..etc)
640 * are not protected by ring buffer.
641 *
642 * The content of events may become garbage if we allow other process consumes
643 * these events concurrently:
644 * A) the page of the consumed events may become a normal page
645 * (not reader page) in ring buffer, and this page will be rewrited
646 * by events producer.
647 * B) The page of the consumed events may become a page for splice_read,
648 * and this page will be returned to system.
649 *
650 * These primitives allow multi process access to different cpu ring buffer
651 * concurrently.
652 *
653 * These primitives don't distinguish read-only and read-consume access.
654 * Multi read-only access are also serialized.
655 */
656
657 #ifdef CONFIG_SMP
658 static DECLARE_RWSEM(all_cpu_access_lock);
659 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
660
trace_access_lock(int cpu)661 static inline void trace_access_lock(int cpu)
662 {
663 if (cpu == RING_BUFFER_ALL_CPUS) {
664 /* gain it for accessing the whole ring buffer. */
665 down_write(&all_cpu_access_lock);
666 } else {
667 /* gain it for accessing a cpu ring buffer. */
668
669 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
670 down_read(&all_cpu_access_lock);
671
672 /* Secondly block other access to this @cpu ring buffer. */
673 mutex_lock(&per_cpu(cpu_access_lock, cpu));
674 }
675 }
676
trace_access_unlock(int cpu)677 static inline void trace_access_unlock(int cpu)
678 {
679 if (cpu == RING_BUFFER_ALL_CPUS) {
680 up_write(&all_cpu_access_lock);
681 } else {
682 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
683 up_read(&all_cpu_access_lock);
684 }
685 }
686
trace_access_lock_init(void)687 static inline void trace_access_lock_init(void)
688 {
689 int cpu;
690
691 for_each_possible_cpu(cpu)
692 mutex_init(&per_cpu(cpu_access_lock, cpu));
693 }
694
695 #else
696
697 static DEFINE_MUTEX(access_lock);
698
trace_access_lock(int cpu)699 static inline void trace_access_lock(int cpu)
700 {
701 (void)cpu;
702 mutex_lock(&access_lock);
703 }
704
trace_access_unlock(int cpu)705 static inline void trace_access_unlock(int cpu)
706 {
707 (void)cpu;
708 mutex_unlock(&access_lock);
709 }
710
trace_access_lock_init(void)711 static inline void trace_access_lock_init(void)
712 {
713 }
714
715 #endif
716
717 #ifdef CONFIG_STACKTRACE
718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
719 unsigned long flags,
720 int skip, int pc, struct pt_regs *regs);
721 static inline void ftrace_trace_stack(struct trace_array *tr,
722 struct ring_buffer *buffer,
723 unsigned long flags,
724 int skip, int pc, struct pt_regs *regs);
725
726 #else
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)727 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
728 unsigned long flags,
729 int skip, int pc, struct pt_regs *regs)
730 {
731 }
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)732 static inline void ftrace_trace_stack(struct trace_array *tr,
733 struct ring_buffer *buffer,
734 unsigned long flags,
735 int skip, int pc, struct pt_regs *regs)
736 {
737 }
738
739 #endif
740
tracer_tracing_on(struct trace_array * tr)741 static void tracer_tracing_on(struct trace_array *tr)
742 {
743 if (tr->trace_buffer.buffer)
744 ring_buffer_record_on(tr->trace_buffer.buffer);
745 /*
746 * This flag is looked at when buffers haven't been allocated
747 * yet, or by some tracers (like irqsoff), that just want to
748 * know if the ring buffer has been disabled, but it can handle
749 * races of where it gets disabled but we still do a record.
750 * As the check is in the fast path of the tracers, it is more
751 * important to be fast than accurate.
752 */
753 tr->buffer_disabled = 0;
754 /* Make the flag seen by readers */
755 smp_wmb();
756 }
757
758 /**
759 * tracing_on - enable tracing buffers
760 *
761 * This function enables tracing buffers that may have been
762 * disabled with tracing_off.
763 */
tracing_on(void)764 void tracing_on(void)
765 {
766 tracer_tracing_on(&global_trace);
767 }
768 EXPORT_SYMBOL_GPL(tracing_on);
769
770 /**
771 * __trace_puts - write a constant string into the trace buffer.
772 * @ip: The address of the caller
773 * @str: The constant string to write
774 * @size: The size of the string.
775 */
__trace_puts(unsigned long ip,const char * str,int size)776 int __trace_puts(unsigned long ip, const char *str, int size)
777 {
778 struct ring_buffer_event *event;
779 struct ring_buffer *buffer;
780 struct print_entry *entry;
781 unsigned long irq_flags;
782 int alloc;
783 int pc;
784
785 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
786 return 0;
787
788 pc = preempt_count();
789
790 if (unlikely(tracing_selftest_running || tracing_disabled))
791 return 0;
792
793 alloc = sizeof(*entry) + size + 2; /* possible \n added */
794
795 local_save_flags(irq_flags);
796 buffer = global_trace.trace_buffer.buffer;
797 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
798 irq_flags, pc);
799 if (!event)
800 return 0;
801
802 entry = ring_buffer_event_data(event);
803 entry->ip = ip;
804
805 memcpy(&entry->buf, str, size);
806
807 /* Add a newline if necessary */
808 if (entry->buf[size - 1] != '\n') {
809 entry->buf[size] = '\n';
810 entry->buf[size + 1] = '\0';
811 } else
812 entry->buf[size] = '\0';
813
814 __buffer_unlock_commit(buffer, event);
815 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
816
817 return size;
818 }
819 EXPORT_SYMBOL_GPL(__trace_puts);
820
821 /**
822 * __trace_bputs - write the pointer to a constant string into trace buffer
823 * @ip: The address of the caller
824 * @str: The constant string to write to the buffer to
825 */
__trace_bputs(unsigned long ip,const char * str)826 int __trace_bputs(unsigned long ip, const char *str)
827 {
828 struct ring_buffer_event *event;
829 struct ring_buffer *buffer;
830 struct bputs_entry *entry;
831 unsigned long irq_flags;
832 int size = sizeof(struct bputs_entry);
833 int pc;
834
835 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
836 return 0;
837
838 pc = preempt_count();
839
840 if (unlikely(tracing_selftest_running || tracing_disabled))
841 return 0;
842
843 local_save_flags(irq_flags);
844 buffer = global_trace.trace_buffer.buffer;
845 event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
846 irq_flags, pc);
847 if (!event)
848 return 0;
849
850 entry = ring_buffer_event_data(event);
851 entry->ip = ip;
852 entry->str = str;
853
854 __buffer_unlock_commit(buffer, event);
855 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856
857 return 1;
858 }
859 EXPORT_SYMBOL_GPL(__trace_bputs);
860
861 #ifdef CONFIG_TRACER_SNAPSHOT
862 /**
863 * trace_snapshot - take a snapshot of the current buffer.
864 *
865 * This causes a swap between the snapshot buffer and the current live
866 * tracing buffer. You can use this to take snapshots of the live
867 * trace when some condition is triggered, but continue to trace.
868 *
869 * Note, make sure to allocate the snapshot with either
870 * a tracing_snapshot_alloc(), or by doing it manually
871 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
872 *
873 * If the snapshot buffer is not allocated, it will stop tracing.
874 * Basically making a permanent snapshot.
875 */
tracing_snapshot(void)876 void tracing_snapshot(void)
877 {
878 struct trace_array *tr = &global_trace;
879 struct tracer *tracer = tr->current_trace;
880 unsigned long flags;
881
882 if (in_nmi()) {
883 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
884 internal_trace_puts("*** snapshot is being ignored ***\n");
885 return;
886 }
887
888 if (!tr->allocated_snapshot) {
889 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
890 internal_trace_puts("*** stopping trace here! ***\n");
891 tracing_off();
892 return;
893 }
894
895 /* Note, snapshot can not be used when the tracer uses it */
896 if (tracer->use_max_tr) {
897 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
898 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
899 return;
900 }
901
902 local_irq_save(flags);
903 update_max_tr(tr, current, smp_processor_id());
904 local_irq_restore(flags);
905 }
906 EXPORT_SYMBOL_GPL(tracing_snapshot);
907
908 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
909 struct trace_buffer *size_buf, int cpu_id);
910 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
911
alloc_snapshot(struct trace_array * tr)912 static int alloc_snapshot(struct trace_array *tr)
913 {
914 int ret;
915
916 if (!tr->allocated_snapshot) {
917
918 /* allocate spare buffer */
919 ret = resize_buffer_duplicate_size(&tr->max_buffer,
920 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
921 if (ret < 0)
922 return ret;
923
924 tr->allocated_snapshot = true;
925 }
926
927 return 0;
928 }
929
free_snapshot(struct trace_array * tr)930 static void free_snapshot(struct trace_array *tr)
931 {
932 /*
933 * We don't free the ring buffer. instead, resize it because
934 * The max_tr ring buffer has some state (e.g. ring->clock) and
935 * we want preserve it.
936 */
937 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
938 set_buffer_entries(&tr->max_buffer, 1);
939 tracing_reset_online_cpus(&tr->max_buffer);
940 tr->allocated_snapshot = false;
941 }
942
943 /**
944 * tracing_alloc_snapshot - allocate snapshot buffer.
945 *
946 * This only allocates the snapshot buffer if it isn't already
947 * allocated - it doesn't also take a snapshot.
948 *
949 * This is meant to be used in cases where the snapshot buffer needs
950 * to be set up for events that can't sleep but need to be able to
951 * trigger a snapshot.
952 */
tracing_alloc_snapshot(void)953 int tracing_alloc_snapshot(void)
954 {
955 struct trace_array *tr = &global_trace;
956 int ret;
957
958 ret = alloc_snapshot(tr);
959 WARN_ON(ret < 0);
960
961 return ret;
962 }
963 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
964
965 /**
966 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
967 *
968 * This is similar to trace_snapshot(), but it will allocate the
969 * snapshot buffer if it isn't already allocated. Use this only
970 * where it is safe to sleep, as the allocation may sleep.
971 *
972 * This causes a swap between the snapshot buffer and the current live
973 * tracing buffer. You can use this to take snapshots of the live
974 * trace when some condition is triggered, but continue to trace.
975 */
tracing_snapshot_alloc(void)976 void tracing_snapshot_alloc(void)
977 {
978 int ret;
979
980 ret = tracing_alloc_snapshot();
981 if (ret < 0)
982 return;
983
984 tracing_snapshot();
985 }
986 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
987 #else
tracing_snapshot(void)988 void tracing_snapshot(void)
989 {
990 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
991 }
992 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_alloc_snapshot(void)993 int tracing_alloc_snapshot(void)
994 {
995 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
996 return -ENODEV;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)999 void tracing_snapshot_alloc(void)
1000 {
1001 /* Give warning */
1002 tracing_snapshot();
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005 #endif /* CONFIG_TRACER_SNAPSHOT */
1006
tracer_tracing_off(struct trace_array * tr)1007 static void tracer_tracing_off(struct trace_array *tr)
1008 {
1009 if (tr->trace_buffer.buffer)
1010 ring_buffer_record_off(tr->trace_buffer.buffer);
1011 /*
1012 * This flag is looked at when buffers haven't been allocated
1013 * yet, or by some tracers (like irqsoff), that just want to
1014 * know if the ring buffer has been disabled, but it can handle
1015 * races of where it gets disabled but we still do a record.
1016 * As the check is in the fast path of the tracers, it is more
1017 * important to be fast than accurate.
1018 */
1019 tr->buffer_disabled = 1;
1020 /* Make the flag seen by readers */
1021 smp_wmb();
1022 }
1023
1024 /**
1025 * tracing_off - turn off tracing buffers
1026 *
1027 * This function stops the tracing buffers from recording data.
1028 * It does not disable any overhead the tracers themselves may
1029 * be causing. This function simply causes all recording to
1030 * the ring buffers to fail.
1031 */
tracing_off(void)1032 void tracing_off(void)
1033 {
1034 tracer_tracing_off(&global_trace);
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_off);
1037
disable_trace_on_warning(void)1038 void disable_trace_on_warning(void)
1039 {
1040 if (__disable_trace_on_warning)
1041 tracing_off();
1042 }
1043
1044 /**
1045 * tracer_tracing_is_on - show real state of ring buffer enabled
1046 * @tr : the trace array to know if ring buffer is enabled
1047 *
1048 * Shows real state of the ring buffer if it is enabled or not.
1049 */
tracer_tracing_is_on(struct trace_array * tr)1050 int tracer_tracing_is_on(struct trace_array *tr)
1051 {
1052 if (tr->trace_buffer.buffer)
1053 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054 return !tr->buffer_disabled;
1055 }
1056
1057 /**
1058 * tracing_is_on - show state of ring buffers enabled
1059 */
tracing_is_on(void)1060 int tracing_is_on(void)
1061 {
1062 return tracer_tracing_is_on(&global_trace);
1063 }
1064 EXPORT_SYMBOL_GPL(tracing_is_on);
1065
set_buf_size(char * str)1066 static int __init set_buf_size(char *str)
1067 {
1068 unsigned long buf_size;
1069
1070 if (!str)
1071 return 0;
1072 buf_size = memparse(str, &str);
1073 /* nr_entries can not be zero */
1074 if (buf_size == 0)
1075 return 0;
1076 trace_buf_size = buf_size;
1077 return 1;
1078 }
1079 __setup("trace_buf_size=", set_buf_size);
1080
set_tracing_thresh(char * str)1081 static int __init set_tracing_thresh(char *str)
1082 {
1083 unsigned long threshold;
1084 int ret;
1085
1086 if (!str)
1087 return 0;
1088 ret = kstrtoul(str, 0, &threshold);
1089 if (ret < 0)
1090 return 0;
1091 tracing_thresh = threshold * 1000;
1092 return 1;
1093 }
1094 __setup("tracing_thresh=", set_tracing_thresh);
1095
nsecs_to_usecs(unsigned long nsecs)1096 unsigned long nsecs_to_usecs(unsigned long nsecs)
1097 {
1098 return nsecs / 1000;
1099 }
1100
1101 /*
1102 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105 * of strings in the order that the enums were defined.
1106 */
1107 #undef C
1108 #define C(a, b) b
1109
1110 /* These must match the bit postions in trace_iterator_flags */
1111 static const char *trace_options[] = {
1112 TRACE_FLAGS
1113 NULL
1114 };
1115
1116 static struct {
1117 u64 (*func)(void);
1118 const char *name;
1119 int in_ns; /* is this clock in nanoseconds? */
1120 } trace_clocks[] = {
1121 { trace_clock_local, "local", 1 },
1122 { trace_clock_global, "global", 1 },
1123 { trace_clock_counter, "counter", 0 },
1124 { trace_clock_jiffies, "uptime", 0 },
1125 { trace_clock, "perf", 1 },
1126 { ktime_get_mono_fast_ns, "mono", 1 },
1127 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1128 { ktime_get_boot_fast_ns, "boot", 1 },
1129 ARCH_TRACE_CLOCKS
1130 };
1131
1132 /*
1133 * trace_parser_get_init - gets the buffer for trace parser
1134 */
trace_parser_get_init(struct trace_parser * parser,int size)1135 int trace_parser_get_init(struct trace_parser *parser, int size)
1136 {
1137 memset(parser, 0, sizeof(*parser));
1138
1139 parser->buffer = kmalloc(size, GFP_KERNEL);
1140 if (!parser->buffer)
1141 return 1;
1142
1143 parser->size = size;
1144 return 0;
1145 }
1146
1147 /*
1148 * trace_parser_put - frees the buffer for trace parser
1149 */
trace_parser_put(struct trace_parser * parser)1150 void trace_parser_put(struct trace_parser *parser)
1151 {
1152 kfree(parser->buffer);
1153 }
1154
1155 /*
1156 * trace_get_user - reads the user input string separated by space
1157 * (matched by isspace(ch))
1158 *
1159 * For each string found the 'struct trace_parser' is updated,
1160 * and the function returns.
1161 *
1162 * Returns number of bytes read.
1163 *
1164 * See kernel/trace/trace.h for 'struct trace_parser' details.
1165 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1166 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1167 size_t cnt, loff_t *ppos)
1168 {
1169 char ch;
1170 size_t read = 0;
1171 ssize_t ret;
1172
1173 if (!*ppos)
1174 trace_parser_clear(parser);
1175
1176 ret = get_user(ch, ubuf++);
1177 if (ret)
1178 goto out;
1179
1180 read++;
1181 cnt--;
1182
1183 /*
1184 * The parser is not finished with the last write,
1185 * continue reading the user input without skipping spaces.
1186 */
1187 if (!parser->cont) {
1188 /* skip white space */
1189 while (cnt && isspace(ch)) {
1190 ret = get_user(ch, ubuf++);
1191 if (ret)
1192 goto out;
1193 read++;
1194 cnt--;
1195 }
1196
1197 /* only spaces were written */
1198 if (isspace(ch)) {
1199 *ppos += read;
1200 ret = read;
1201 goto out;
1202 }
1203
1204 parser->idx = 0;
1205 }
1206
1207 /* read the non-space input */
1208 while (cnt && !isspace(ch)) {
1209 if (parser->idx < parser->size - 1)
1210 parser->buffer[parser->idx++] = ch;
1211 else {
1212 ret = -EINVAL;
1213 goto out;
1214 }
1215 ret = get_user(ch, ubuf++);
1216 if (ret)
1217 goto out;
1218 read++;
1219 cnt--;
1220 }
1221
1222 /* We either got finished input or we have to wait for another call. */
1223 if (isspace(ch)) {
1224 parser->buffer[parser->idx] = 0;
1225 parser->cont = false;
1226 } else if (parser->idx < parser->size - 1) {
1227 parser->cont = true;
1228 parser->buffer[parser->idx++] = ch;
1229 } else {
1230 ret = -EINVAL;
1231 goto out;
1232 }
1233
1234 *ppos += read;
1235 ret = read;
1236
1237 out:
1238 return ret;
1239 }
1240
1241 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1242 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1243 {
1244 int len;
1245
1246 if (trace_seq_used(s) <= s->seq.readpos)
1247 return -EBUSY;
1248
1249 len = trace_seq_used(s) - s->seq.readpos;
1250 if (cnt > len)
1251 cnt = len;
1252 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1253
1254 s->seq.readpos += cnt;
1255 return cnt;
1256 }
1257
1258 unsigned long __read_mostly tracing_thresh;
1259
1260 #ifdef CONFIG_TRACER_MAX_TRACE
1261 /*
1262 * Copy the new maximum trace into the separate maximum-trace
1263 * structure. (this way the maximum trace is permanently saved,
1264 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1265 */
1266 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1267 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1268 {
1269 struct trace_buffer *trace_buf = &tr->trace_buffer;
1270 struct trace_buffer *max_buf = &tr->max_buffer;
1271 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1272 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1273
1274 max_buf->cpu = cpu;
1275 max_buf->time_start = data->preempt_timestamp;
1276
1277 max_data->saved_latency = tr->max_latency;
1278 max_data->critical_start = data->critical_start;
1279 max_data->critical_end = data->critical_end;
1280
1281 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1282 max_data->pid = tsk->pid;
1283 /*
1284 * If tsk == current, then use current_uid(), as that does not use
1285 * RCU. The irq tracer can be called out of RCU scope.
1286 */
1287 if (tsk == current)
1288 max_data->uid = current_uid();
1289 else
1290 max_data->uid = task_uid(tsk);
1291
1292 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1293 max_data->policy = tsk->policy;
1294 max_data->rt_priority = tsk->rt_priority;
1295
1296 /* record this tasks comm */
1297 tracing_record_cmdline(tsk);
1298 }
1299
1300 /**
1301 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1302 * @tr: tracer
1303 * @tsk: the task with the latency
1304 * @cpu: The cpu that initiated the trace.
1305 *
1306 * Flip the buffers between the @tr and the max_tr and record information
1307 * about which task was the cause of this latency.
1308 */
1309 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1310 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1311 {
1312 struct ring_buffer *buf;
1313
1314 if (tr->stop_count)
1315 return;
1316
1317 WARN_ON_ONCE(!irqs_disabled());
1318
1319 if (!tr->allocated_snapshot) {
1320 /* Only the nop tracer should hit this when disabling */
1321 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1322 return;
1323 }
1324
1325 arch_spin_lock(&tr->max_lock);
1326
1327 buf = tr->trace_buffer.buffer;
1328 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1329 tr->max_buffer.buffer = buf;
1330
1331 __update_max_tr(tr, tsk, cpu);
1332 arch_spin_unlock(&tr->max_lock);
1333 }
1334
1335 /**
1336 * update_max_tr_single - only copy one trace over, and reset the rest
1337 * @tr - tracer
1338 * @tsk - task with the latency
1339 * @cpu - the cpu of the buffer to copy.
1340 *
1341 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1342 */
1343 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1344 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1345 {
1346 int ret;
1347
1348 if (tr->stop_count)
1349 return;
1350
1351 WARN_ON_ONCE(!irqs_disabled());
1352 if (!tr->allocated_snapshot) {
1353 /* Only the nop tracer should hit this when disabling */
1354 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1355 return;
1356 }
1357
1358 arch_spin_lock(&tr->max_lock);
1359
1360 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1361
1362 if (ret == -EBUSY) {
1363 /*
1364 * We failed to swap the buffer due to a commit taking
1365 * place on this CPU. We fail to record, but we reset
1366 * the max trace buffer (no one writes directly to it)
1367 * and flag that it failed.
1368 */
1369 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1370 "Failed to swap buffers due to commit in progress\n");
1371 }
1372
1373 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1374
1375 __update_max_tr(tr, tsk, cpu);
1376 arch_spin_unlock(&tr->max_lock);
1377 }
1378 #endif /* CONFIG_TRACER_MAX_TRACE */
1379
wait_on_pipe(struct trace_iterator * iter,bool full)1380 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1381 {
1382 /* Iterators are static, they should be filled or empty */
1383 if (trace_buffer_iter(iter, iter->cpu_file))
1384 return 0;
1385
1386 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1387 full);
1388 }
1389
1390 #ifdef CONFIG_FTRACE_STARTUP_TEST
run_tracer_selftest(struct tracer * type)1391 static int run_tracer_selftest(struct tracer *type)
1392 {
1393 struct trace_array *tr = &global_trace;
1394 struct tracer *saved_tracer = tr->current_trace;
1395 int ret;
1396
1397 if (!type->selftest || tracing_selftest_disabled)
1398 return 0;
1399
1400 /*
1401 * Run a selftest on this tracer.
1402 * Here we reset the trace buffer, and set the current
1403 * tracer to be this tracer. The tracer can then run some
1404 * internal tracing to verify that everything is in order.
1405 * If we fail, we do not register this tracer.
1406 */
1407 tracing_reset_online_cpus(&tr->trace_buffer);
1408
1409 tr->current_trace = type;
1410
1411 #ifdef CONFIG_TRACER_MAX_TRACE
1412 if (type->use_max_tr) {
1413 /* If we expanded the buffers, make sure the max is expanded too */
1414 if (ring_buffer_expanded)
1415 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1416 RING_BUFFER_ALL_CPUS);
1417 tr->allocated_snapshot = true;
1418 }
1419 #endif
1420
1421 /* the test is responsible for initializing and enabling */
1422 pr_info("Testing tracer %s: ", type->name);
1423 ret = type->selftest(type, tr);
1424 /* the test is responsible for resetting too */
1425 tr->current_trace = saved_tracer;
1426 if (ret) {
1427 printk(KERN_CONT "FAILED!\n");
1428 /* Add the warning after printing 'FAILED' */
1429 WARN_ON(1);
1430 return -1;
1431 }
1432 /* Only reset on passing, to avoid touching corrupted buffers */
1433 tracing_reset_online_cpus(&tr->trace_buffer);
1434
1435 #ifdef CONFIG_TRACER_MAX_TRACE
1436 if (type->use_max_tr) {
1437 tr->allocated_snapshot = false;
1438
1439 /* Shrink the max buffer again */
1440 if (ring_buffer_expanded)
1441 ring_buffer_resize(tr->max_buffer.buffer, 1,
1442 RING_BUFFER_ALL_CPUS);
1443 }
1444 #endif
1445
1446 printk(KERN_CONT "PASSED\n");
1447 return 0;
1448 }
1449 #else
run_tracer_selftest(struct tracer * type)1450 static inline int run_tracer_selftest(struct tracer *type)
1451 {
1452 return 0;
1453 }
1454 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1455
1456 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1457
1458 static void __init apply_trace_boot_options(void);
1459
1460 /**
1461 * register_tracer - register a tracer with the ftrace system.
1462 * @type - the plugin for the tracer
1463 *
1464 * Register a new plugin tracer.
1465 */
register_tracer(struct tracer * type)1466 int __init register_tracer(struct tracer *type)
1467 {
1468 struct tracer *t;
1469 int ret = 0;
1470
1471 if (!type->name) {
1472 pr_info("Tracer must have a name\n");
1473 return -1;
1474 }
1475
1476 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1477 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1478 return -1;
1479 }
1480
1481 mutex_lock(&trace_types_lock);
1482
1483 tracing_selftest_running = true;
1484
1485 for (t = trace_types; t; t = t->next) {
1486 if (strcmp(type->name, t->name) == 0) {
1487 /* already found */
1488 pr_info("Tracer %s already registered\n",
1489 type->name);
1490 ret = -1;
1491 goto out;
1492 }
1493 }
1494
1495 if (!type->set_flag)
1496 type->set_flag = &dummy_set_flag;
1497 if (!type->flags) {
1498 /*allocate a dummy tracer_flags*/
1499 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1500 if (!type->flags) {
1501 ret = -ENOMEM;
1502 goto out;
1503 }
1504 type->flags->val = 0;
1505 type->flags->opts = dummy_tracer_opt;
1506 } else
1507 if (!type->flags->opts)
1508 type->flags->opts = dummy_tracer_opt;
1509
1510 /* store the tracer for __set_tracer_option */
1511 type->flags->trace = type;
1512
1513 ret = run_tracer_selftest(type);
1514 if (ret < 0)
1515 goto out;
1516
1517 type->next = trace_types;
1518 trace_types = type;
1519 add_tracer_options(&global_trace, type);
1520
1521 out:
1522 tracing_selftest_running = false;
1523 mutex_unlock(&trace_types_lock);
1524
1525 if (ret || !default_bootup_tracer)
1526 goto out_unlock;
1527
1528 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1529 goto out_unlock;
1530
1531 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1532 /* Do we want this tracer to start on bootup? */
1533 tracing_set_tracer(&global_trace, type->name);
1534 default_bootup_tracer = NULL;
1535
1536 apply_trace_boot_options();
1537
1538 /* disable other selftests, since this will break it. */
1539 tracing_selftest_disabled = true;
1540 #ifdef CONFIG_FTRACE_STARTUP_TEST
1541 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1542 type->name);
1543 #endif
1544
1545 out_unlock:
1546 return ret;
1547 }
1548
tracing_reset(struct trace_buffer * buf,int cpu)1549 void tracing_reset(struct trace_buffer *buf, int cpu)
1550 {
1551 struct ring_buffer *buffer = buf->buffer;
1552
1553 if (!buffer)
1554 return;
1555
1556 ring_buffer_record_disable(buffer);
1557
1558 /* Make sure all commits have finished */
1559 synchronize_sched();
1560 ring_buffer_reset_cpu(buffer, cpu);
1561
1562 ring_buffer_record_enable(buffer);
1563 }
1564
tracing_reset_online_cpus(struct trace_buffer * buf)1565 void tracing_reset_online_cpus(struct trace_buffer *buf)
1566 {
1567 struct ring_buffer *buffer = buf->buffer;
1568 int cpu;
1569
1570 if (!buffer)
1571 return;
1572
1573 ring_buffer_record_disable(buffer);
1574
1575 /* Make sure all commits have finished */
1576 synchronize_sched();
1577
1578 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1579
1580 for_each_online_cpu(cpu)
1581 ring_buffer_reset_cpu(buffer, cpu);
1582
1583 ring_buffer_record_enable(buffer);
1584 }
1585
1586 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)1587 void tracing_reset_all_online_cpus(void)
1588 {
1589 struct trace_array *tr;
1590
1591 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1592 tracing_reset_online_cpus(&tr->trace_buffer);
1593 #ifdef CONFIG_TRACER_MAX_TRACE
1594 tracing_reset_online_cpus(&tr->max_buffer);
1595 #endif
1596 }
1597 }
1598
1599 #define SAVED_CMDLINES_DEFAULT 128
1600 #define NO_CMDLINE_MAP UINT_MAX
1601 static unsigned saved_tgids[SAVED_CMDLINES_DEFAULT];
1602 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1603 struct saved_cmdlines_buffer {
1604 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1605 unsigned *map_cmdline_to_pid;
1606 unsigned cmdline_num;
1607 int cmdline_idx;
1608 char *saved_cmdlines;
1609 };
1610 static struct saved_cmdlines_buffer *savedcmd;
1611
1612 /* temporary disable recording */
1613 static atomic_t trace_record_cmdline_disabled __read_mostly;
1614
get_saved_cmdlines(int idx)1615 static inline char *get_saved_cmdlines(int idx)
1616 {
1617 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1618 }
1619
set_cmdline(int idx,const char * cmdline)1620 static inline void set_cmdline(int idx, const char *cmdline)
1621 {
1622 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1623 }
1624
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)1625 static int allocate_cmdlines_buffer(unsigned int val,
1626 struct saved_cmdlines_buffer *s)
1627 {
1628 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1629 GFP_KERNEL);
1630 if (!s->map_cmdline_to_pid)
1631 return -ENOMEM;
1632
1633 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1634 if (!s->saved_cmdlines) {
1635 kfree(s->map_cmdline_to_pid);
1636 return -ENOMEM;
1637 }
1638
1639 s->cmdline_idx = 0;
1640 s->cmdline_num = val;
1641 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1642 sizeof(s->map_pid_to_cmdline));
1643 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1644 val * sizeof(*s->map_cmdline_to_pid));
1645
1646 return 0;
1647 }
1648
trace_create_savedcmd(void)1649 static int trace_create_savedcmd(void)
1650 {
1651 int ret;
1652
1653 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1654 if (!savedcmd)
1655 return -ENOMEM;
1656
1657 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1658 if (ret < 0) {
1659 kfree(savedcmd);
1660 savedcmd = NULL;
1661 return -ENOMEM;
1662 }
1663
1664 return 0;
1665 }
1666
is_tracing_stopped(void)1667 int is_tracing_stopped(void)
1668 {
1669 return global_trace.stop_count;
1670 }
1671
1672 /**
1673 * tracing_start - quick start of the tracer
1674 *
1675 * If tracing is enabled but was stopped by tracing_stop,
1676 * this will start the tracer back up.
1677 */
tracing_start(void)1678 void tracing_start(void)
1679 {
1680 struct ring_buffer *buffer;
1681 unsigned long flags;
1682
1683 if (tracing_disabled)
1684 return;
1685
1686 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1687 if (--global_trace.stop_count) {
1688 if (global_trace.stop_count < 0) {
1689 /* Someone screwed up their debugging */
1690 WARN_ON_ONCE(1);
1691 global_trace.stop_count = 0;
1692 }
1693 goto out;
1694 }
1695
1696 /* Prevent the buffers from switching */
1697 arch_spin_lock(&global_trace.max_lock);
1698
1699 buffer = global_trace.trace_buffer.buffer;
1700 if (buffer)
1701 ring_buffer_record_enable(buffer);
1702
1703 #ifdef CONFIG_TRACER_MAX_TRACE
1704 buffer = global_trace.max_buffer.buffer;
1705 if (buffer)
1706 ring_buffer_record_enable(buffer);
1707 #endif
1708
1709 arch_spin_unlock(&global_trace.max_lock);
1710
1711 out:
1712 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1713 }
1714
tracing_start_tr(struct trace_array * tr)1715 static void tracing_start_tr(struct trace_array *tr)
1716 {
1717 struct ring_buffer *buffer;
1718 unsigned long flags;
1719
1720 if (tracing_disabled)
1721 return;
1722
1723 /* If global, we need to also start the max tracer */
1724 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1725 return tracing_start();
1726
1727 raw_spin_lock_irqsave(&tr->start_lock, flags);
1728
1729 if (--tr->stop_count) {
1730 if (tr->stop_count < 0) {
1731 /* Someone screwed up their debugging */
1732 WARN_ON_ONCE(1);
1733 tr->stop_count = 0;
1734 }
1735 goto out;
1736 }
1737
1738 buffer = tr->trace_buffer.buffer;
1739 if (buffer)
1740 ring_buffer_record_enable(buffer);
1741
1742 out:
1743 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1744 }
1745
1746 /**
1747 * tracing_stop - quick stop of the tracer
1748 *
1749 * Light weight way to stop tracing. Use in conjunction with
1750 * tracing_start.
1751 */
tracing_stop(void)1752 void tracing_stop(void)
1753 {
1754 struct ring_buffer *buffer;
1755 unsigned long flags;
1756
1757 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1758 if (global_trace.stop_count++)
1759 goto out;
1760
1761 /* Prevent the buffers from switching */
1762 arch_spin_lock(&global_trace.max_lock);
1763
1764 buffer = global_trace.trace_buffer.buffer;
1765 if (buffer)
1766 ring_buffer_record_disable(buffer);
1767
1768 #ifdef CONFIG_TRACER_MAX_TRACE
1769 buffer = global_trace.max_buffer.buffer;
1770 if (buffer)
1771 ring_buffer_record_disable(buffer);
1772 #endif
1773
1774 arch_spin_unlock(&global_trace.max_lock);
1775
1776 out:
1777 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1778 }
1779
tracing_stop_tr(struct trace_array * tr)1780 static void tracing_stop_tr(struct trace_array *tr)
1781 {
1782 struct ring_buffer *buffer;
1783 unsigned long flags;
1784
1785 /* If global, we need to also stop the max tracer */
1786 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1787 return tracing_stop();
1788
1789 raw_spin_lock_irqsave(&tr->start_lock, flags);
1790 if (tr->stop_count++)
1791 goto out;
1792
1793 buffer = tr->trace_buffer.buffer;
1794 if (buffer)
1795 ring_buffer_record_disable(buffer);
1796
1797 out:
1798 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1799 }
1800
1801 void trace_stop_cmdline_recording(void);
1802
trace_save_cmdline(struct task_struct * tsk)1803 static int trace_save_cmdline(struct task_struct *tsk)
1804 {
1805 unsigned pid, idx;
1806
1807 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1808 return 0;
1809
1810 /*
1811 * It's not the end of the world if we don't get
1812 * the lock, but we also don't want to spin
1813 * nor do we want to disable interrupts,
1814 * so if we miss here, then better luck next time.
1815 */
1816 if (!arch_spin_trylock(&trace_cmdline_lock))
1817 return 0;
1818
1819 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1820 if (idx == NO_CMDLINE_MAP) {
1821 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1822
1823 /*
1824 * Check whether the cmdline buffer at idx has a pid
1825 * mapped. We are going to overwrite that entry so we
1826 * need to clear the map_pid_to_cmdline. Otherwise we
1827 * would read the new comm for the old pid.
1828 */
1829 pid = savedcmd->map_cmdline_to_pid[idx];
1830 if (pid != NO_CMDLINE_MAP)
1831 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1832
1833 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1834 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1835
1836 savedcmd->cmdline_idx = idx;
1837 }
1838
1839 set_cmdline(idx, tsk->comm);
1840 saved_tgids[idx] = tsk->tgid;
1841 arch_spin_unlock(&trace_cmdline_lock);
1842
1843 return 1;
1844 }
1845
__trace_find_cmdline(int pid,char comm[])1846 static void __trace_find_cmdline(int pid, char comm[])
1847 {
1848 unsigned map;
1849
1850 if (!pid) {
1851 strcpy(comm, "<idle>");
1852 return;
1853 }
1854
1855 if (WARN_ON_ONCE(pid < 0)) {
1856 strcpy(comm, "<XXX>");
1857 return;
1858 }
1859
1860 if (pid > PID_MAX_DEFAULT) {
1861 strcpy(comm, "<...>");
1862 return;
1863 }
1864
1865 map = savedcmd->map_pid_to_cmdline[pid];
1866 if (map != NO_CMDLINE_MAP)
1867 strcpy(comm, get_saved_cmdlines(map));
1868 else
1869 strcpy(comm, "<...>");
1870 }
1871
trace_find_cmdline(int pid,char comm[])1872 void trace_find_cmdline(int pid, char comm[])
1873 {
1874 preempt_disable();
1875 arch_spin_lock(&trace_cmdline_lock);
1876
1877 __trace_find_cmdline(pid, comm);
1878
1879 arch_spin_unlock(&trace_cmdline_lock);
1880 preempt_enable();
1881 }
1882
trace_find_tgid(int pid)1883 int trace_find_tgid(int pid)
1884 {
1885 unsigned map;
1886 int tgid;
1887
1888 preempt_disable();
1889 arch_spin_lock(&trace_cmdline_lock);
1890 map = savedcmd->map_pid_to_cmdline[pid];
1891 if (map != NO_CMDLINE_MAP)
1892 tgid = saved_tgids[map];
1893 else
1894 tgid = -1;
1895
1896 arch_spin_unlock(&trace_cmdline_lock);
1897 preempt_enable();
1898
1899 return tgid;
1900 }
1901
tracing_record_cmdline(struct task_struct * tsk)1902 void tracing_record_cmdline(struct task_struct *tsk)
1903 {
1904 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1905 return;
1906
1907 if (!__this_cpu_read(trace_cmdline_save))
1908 return;
1909
1910 if (trace_save_cmdline(tsk))
1911 __this_cpu_write(trace_cmdline_save, false);
1912 }
1913
1914 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned long flags,int pc)1915 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1916 int pc)
1917 {
1918 struct task_struct *tsk = current;
1919
1920 entry->preempt_count = pc & 0xff;
1921 entry->pid = (tsk) ? tsk->pid : 0;
1922 entry->flags =
1923 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1924 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1925 #else
1926 TRACE_FLAG_IRQS_NOSUPPORT |
1927 #endif
1928 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
1929 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1930 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1931 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1932 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1933 }
1934 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1935
1936 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)1937 trace_event_setup(struct ring_buffer_event *event,
1938 int type, unsigned long flags, int pc)
1939 {
1940 struct trace_entry *ent = ring_buffer_event_data(event);
1941
1942 tracing_generic_entry_update(ent, flags, pc);
1943 ent->type = type;
1944 }
1945
1946 struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)1947 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1948 int type,
1949 unsigned long len,
1950 unsigned long flags, int pc)
1951 {
1952 struct ring_buffer_event *event;
1953
1954 event = ring_buffer_lock_reserve(buffer, len);
1955 if (event != NULL)
1956 trace_event_setup(event, type, flags, pc);
1957
1958 return event;
1959 }
1960
1961 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1962 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1963 static int trace_buffered_event_ref;
1964
1965 /**
1966 * trace_buffered_event_enable - enable buffering events
1967 *
1968 * When events are being filtered, it is quicker to use a temporary
1969 * buffer to write the event data into if there's a likely chance
1970 * that it will not be committed. The discard of the ring buffer
1971 * is not as fast as committing, and is much slower than copying
1972 * a commit.
1973 *
1974 * When an event is to be filtered, allocate per cpu buffers to
1975 * write the event data into, and if the event is filtered and discarded
1976 * it is simply dropped, otherwise, the entire data is to be committed
1977 * in one shot.
1978 */
trace_buffered_event_enable(void)1979 void trace_buffered_event_enable(void)
1980 {
1981 struct ring_buffer_event *event;
1982 struct page *page;
1983 int cpu;
1984
1985 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1986
1987 if (trace_buffered_event_ref++)
1988 return;
1989
1990 for_each_tracing_cpu(cpu) {
1991 page = alloc_pages_node(cpu_to_node(cpu),
1992 GFP_KERNEL | __GFP_NORETRY, 0);
1993 if (!page)
1994 goto failed;
1995
1996 event = page_address(page);
1997 memset(event, 0, sizeof(*event));
1998
1999 per_cpu(trace_buffered_event, cpu) = event;
2000
2001 preempt_disable();
2002 if (cpu == smp_processor_id() &&
2003 this_cpu_read(trace_buffered_event) !=
2004 per_cpu(trace_buffered_event, cpu))
2005 WARN_ON_ONCE(1);
2006 preempt_enable();
2007 }
2008
2009 return;
2010 failed:
2011 trace_buffered_event_disable();
2012 }
2013
enable_trace_buffered_event(void * data)2014 static void enable_trace_buffered_event(void *data)
2015 {
2016 /* Probably not needed, but do it anyway */
2017 smp_rmb();
2018 this_cpu_dec(trace_buffered_event_cnt);
2019 }
2020
disable_trace_buffered_event(void * data)2021 static void disable_trace_buffered_event(void *data)
2022 {
2023 this_cpu_inc(trace_buffered_event_cnt);
2024 }
2025
2026 /**
2027 * trace_buffered_event_disable - disable buffering events
2028 *
2029 * When a filter is removed, it is faster to not use the buffered
2030 * events, and to commit directly into the ring buffer. Free up
2031 * the temp buffers when there are no more users. This requires
2032 * special synchronization with current events.
2033 */
trace_buffered_event_disable(void)2034 void trace_buffered_event_disable(void)
2035 {
2036 int cpu;
2037
2038 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2039
2040 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2041 return;
2042
2043 if (--trace_buffered_event_ref)
2044 return;
2045
2046 preempt_disable();
2047 /* For each CPU, set the buffer as used. */
2048 smp_call_function_many(tracing_buffer_mask,
2049 disable_trace_buffered_event, NULL, 1);
2050 preempt_enable();
2051
2052 /* Wait for all current users to finish */
2053 synchronize_sched();
2054
2055 for_each_tracing_cpu(cpu) {
2056 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2057 per_cpu(trace_buffered_event, cpu) = NULL;
2058 }
2059 /*
2060 * Make sure trace_buffered_event is NULL before clearing
2061 * trace_buffered_event_cnt.
2062 */
2063 smp_wmb();
2064
2065 preempt_disable();
2066 /* Do the work on each cpu */
2067 smp_call_function_many(tracing_buffer_mask,
2068 enable_trace_buffered_event, NULL, 1);
2069 preempt_enable();
2070 }
2071
2072 void
__buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)2073 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2074 {
2075 __this_cpu_write(trace_cmdline_save, true);
2076
2077 /* If this is the temp buffer, we need to commit fully */
2078 if (this_cpu_read(trace_buffered_event) == event) {
2079 /* Length is in event->array[0] */
2080 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2081 /* Release the temp buffer */
2082 this_cpu_dec(trace_buffered_event_cnt);
2083 } else
2084 ring_buffer_unlock_commit(buffer, event);
2085 }
2086
2087 static struct ring_buffer *temp_buffer;
2088
2089 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2090 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2091 struct trace_event_file *trace_file,
2092 int type, unsigned long len,
2093 unsigned long flags, int pc)
2094 {
2095 struct ring_buffer_event *entry;
2096 int val;
2097
2098 *current_rb = trace_file->tr->trace_buffer.buffer;
2099
2100 if ((trace_file->flags &
2101 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2102 (entry = this_cpu_read(trace_buffered_event))) {
2103 /* Try to use the per cpu buffer first */
2104 val = this_cpu_inc_return(trace_buffered_event_cnt);
2105 if (val == 1) {
2106 trace_event_setup(entry, type, flags, pc);
2107 entry->array[0] = len;
2108 return entry;
2109 }
2110 this_cpu_dec(trace_buffered_event_cnt);
2111 }
2112
2113 entry = trace_buffer_lock_reserve(*current_rb,
2114 type, len, flags, pc);
2115 /*
2116 * If tracing is off, but we have triggers enabled
2117 * we still need to look at the event data. Use the temp_buffer
2118 * to store the trace event for the tigger to use. It's recusive
2119 * safe and will not be recorded anywhere.
2120 */
2121 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2122 *current_rb = temp_buffer;
2123 entry = trace_buffer_lock_reserve(*current_rb,
2124 type, len, flags, pc);
2125 }
2126 return entry;
2127 }
2128 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2129
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2130 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2131 struct ring_buffer *buffer,
2132 struct ring_buffer_event *event,
2133 unsigned long flags, int pc,
2134 struct pt_regs *regs)
2135 {
2136 __buffer_unlock_commit(buffer, event);
2137
2138 /*
2139 * If regs is not set, then skip the following callers:
2140 * trace_buffer_unlock_commit_regs
2141 * event_trigger_unlock_commit
2142 * trace_event_buffer_commit
2143 * trace_event_raw_event_sched_switch
2144 * Note, we can still get here via blktrace, wakeup tracer
2145 * and mmiotrace, but that's ok if they lose a function or
2146 * two. They are that meaningful.
2147 */
2148 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2149 ftrace_trace_userstack(buffer, flags, pc);
2150 }
2151
2152 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2153 trace_function(struct trace_array *tr,
2154 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2155 int pc)
2156 {
2157 struct trace_event_call *call = &event_function;
2158 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2159 struct ring_buffer_event *event;
2160 struct ftrace_entry *entry;
2161
2162 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2163 flags, pc);
2164 if (!event)
2165 return;
2166 entry = ring_buffer_event_data(event);
2167 entry->ip = ip;
2168 entry->parent_ip = parent_ip;
2169
2170 if (!call_filter_check_discard(call, entry, buffer, event))
2171 __buffer_unlock_commit(buffer, event);
2172 }
2173
2174 #ifdef CONFIG_STACKTRACE
2175
2176 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2177 struct ftrace_stack {
2178 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2179 };
2180
2181 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2182 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2183
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2184 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2185 unsigned long flags,
2186 int skip, int pc, struct pt_regs *regs)
2187 {
2188 struct trace_event_call *call = &event_kernel_stack;
2189 struct ring_buffer_event *event;
2190 struct stack_entry *entry;
2191 struct stack_trace trace;
2192 int use_stack;
2193 int size = FTRACE_STACK_ENTRIES;
2194
2195 trace.nr_entries = 0;
2196 trace.skip = skip;
2197
2198 /*
2199 * Add two, for this function and the call to save_stack_trace()
2200 * If regs is set, then these functions will not be in the way.
2201 */
2202 if (!regs)
2203 trace.skip += 2;
2204
2205 /*
2206 * Since events can happen in NMIs there's no safe way to
2207 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2208 * or NMI comes in, it will just have to use the default
2209 * FTRACE_STACK_SIZE.
2210 */
2211 preempt_disable_notrace();
2212
2213 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2214 /*
2215 * We don't need any atomic variables, just a barrier.
2216 * If an interrupt comes in, we don't care, because it would
2217 * have exited and put the counter back to what we want.
2218 * We just need a barrier to keep gcc from moving things
2219 * around.
2220 */
2221 barrier();
2222 if (use_stack == 1) {
2223 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2224 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2225
2226 if (regs)
2227 save_stack_trace_regs(regs, &trace);
2228 else
2229 save_stack_trace(&trace);
2230
2231 if (trace.nr_entries > size)
2232 size = trace.nr_entries;
2233 } else
2234 /* From now on, use_stack is a boolean */
2235 use_stack = 0;
2236
2237 size *= sizeof(unsigned long);
2238
2239 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2240 sizeof(*entry) + size, flags, pc);
2241 if (!event)
2242 goto out;
2243 entry = ring_buffer_event_data(event);
2244
2245 memset(&entry->caller, 0, size);
2246
2247 if (use_stack)
2248 memcpy(&entry->caller, trace.entries,
2249 trace.nr_entries * sizeof(unsigned long));
2250 else {
2251 trace.max_entries = FTRACE_STACK_ENTRIES;
2252 trace.entries = entry->caller;
2253 if (regs)
2254 save_stack_trace_regs(regs, &trace);
2255 else
2256 save_stack_trace(&trace);
2257 }
2258
2259 entry->size = trace.nr_entries;
2260
2261 if (!call_filter_check_discard(call, entry, buffer, event))
2262 __buffer_unlock_commit(buffer, event);
2263
2264 out:
2265 /* Again, don't let gcc optimize things here */
2266 barrier();
2267 __this_cpu_dec(ftrace_stack_reserve);
2268 preempt_enable_notrace();
2269
2270 }
2271
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2272 static inline void ftrace_trace_stack(struct trace_array *tr,
2273 struct ring_buffer *buffer,
2274 unsigned long flags,
2275 int skip, int pc, struct pt_regs *regs)
2276 {
2277 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2278 return;
2279
2280 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2281 }
2282
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)2283 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2284 int pc)
2285 {
2286 __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2287 }
2288
2289 /**
2290 * trace_dump_stack - record a stack back trace in the trace buffer
2291 * @skip: Number of functions to skip (helper handlers)
2292 */
trace_dump_stack(int skip)2293 void trace_dump_stack(int skip)
2294 {
2295 unsigned long flags;
2296
2297 if (tracing_disabled || tracing_selftest_running)
2298 return;
2299
2300 local_save_flags(flags);
2301
2302 /*
2303 * Skip 3 more, seems to get us at the caller of
2304 * this function.
2305 */
2306 skip += 3;
2307 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2308 flags, skip, preempt_count(), NULL);
2309 }
2310
2311 static DEFINE_PER_CPU(int, user_stack_count);
2312
2313 void
ftrace_trace_userstack(struct ring_buffer * buffer,unsigned long flags,int pc)2314 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2315 {
2316 struct trace_event_call *call = &event_user_stack;
2317 struct ring_buffer_event *event;
2318 struct userstack_entry *entry;
2319 struct stack_trace trace;
2320
2321 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2322 return;
2323
2324 /*
2325 * NMIs can not handle page faults, even with fix ups.
2326 * The save user stack can (and often does) fault.
2327 */
2328 if (unlikely(in_nmi()))
2329 return;
2330
2331 /*
2332 * prevent recursion, since the user stack tracing may
2333 * trigger other kernel events.
2334 */
2335 preempt_disable();
2336 if (__this_cpu_read(user_stack_count))
2337 goto out;
2338
2339 __this_cpu_inc(user_stack_count);
2340
2341 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2342 sizeof(*entry), flags, pc);
2343 if (!event)
2344 goto out_drop_count;
2345 entry = ring_buffer_event_data(event);
2346
2347 entry->tgid = current->tgid;
2348 memset(&entry->caller, 0, sizeof(entry->caller));
2349
2350 trace.nr_entries = 0;
2351 trace.max_entries = FTRACE_STACK_ENTRIES;
2352 trace.skip = 0;
2353 trace.entries = entry->caller;
2354
2355 save_stack_trace_user(&trace);
2356 if (!call_filter_check_discard(call, entry, buffer, event))
2357 __buffer_unlock_commit(buffer, event);
2358
2359 out_drop_count:
2360 __this_cpu_dec(user_stack_count);
2361 out:
2362 preempt_enable();
2363 }
2364
2365 #ifdef UNUSED
__trace_userstack(struct trace_array * tr,unsigned long flags)2366 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2367 {
2368 ftrace_trace_userstack(tr, flags, preempt_count());
2369 }
2370 #endif /* UNUSED */
2371
2372 #endif /* CONFIG_STACKTRACE */
2373
2374 /* created for use with alloc_percpu */
2375 struct trace_buffer_struct {
2376 int nesting;
2377 char buffer[4][TRACE_BUF_SIZE];
2378 };
2379
2380 static struct trace_buffer_struct *trace_percpu_buffer;
2381
2382 /*
2383 * Thise allows for lockless recording. If we're nested too deeply, then
2384 * this returns NULL.
2385 */
get_trace_buf(void)2386 static char *get_trace_buf(void)
2387 {
2388 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2389
2390 if (!buffer || buffer->nesting >= 4)
2391 return NULL;
2392
2393 buffer->nesting++;
2394
2395 /* Interrupts must see nesting incremented before we use the buffer */
2396 barrier();
2397 return &buffer->buffer[buffer->nesting][0];
2398 }
2399
put_trace_buf(void)2400 static void put_trace_buf(void)
2401 {
2402 /* Don't let the decrement of nesting leak before this */
2403 barrier();
2404 this_cpu_dec(trace_percpu_buffer->nesting);
2405 }
2406
alloc_percpu_trace_buffer(void)2407 static int alloc_percpu_trace_buffer(void)
2408 {
2409 struct trace_buffer_struct *buffers;
2410
2411 buffers = alloc_percpu(struct trace_buffer_struct);
2412 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2413 return -ENOMEM;
2414
2415 trace_percpu_buffer = buffers;
2416 return 0;
2417 }
2418
2419 static int buffers_allocated;
2420
trace_printk_init_buffers(void)2421 void trace_printk_init_buffers(void)
2422 {
2423 if (buffers_allocated)
2424 return;
2425
2426 if (alloc_percpu_trace_buffer())
2427 return;
2428
2429 /* trace_printk() is for debug use only. Don't use it in production. */
2430
2431 pr_warn("\n");
2432 pr_warn("**********************************************************\n");
2433 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2434 pr_warn("** **\n");
2435 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2436 pr_warn("** **\n");
2437 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2438 pr_warn("** unsafe for production use. **\n");
2439 pr_warn("** **\n");
2440 pr_warn("** If you see this message and you are not debugging **\n");
2441 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2442 pr_warn("** **\n");
2443 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2444 pr_warn("**********************************************************\n");
2445
2446 /* Expand the buffers to set size */
2447 tracing_update_buffers();
2448
2449 buffers_allocated = 1;
2450
2451 /*
2452 * trace_printk_init_buffers() can be called by modules.
2453 * If that happens, then we need to start cmdline recording
2454 * directly here. If the global_trace.buffer is already
2455 * allocated here, then this was called by module code.
2456 */
2457 if (global_trace.trace_buffer.buffer)
2458 tracing_start_cmdline_record();
2459 }
2460
trace_printk_start_comm(void)2461 void trace_printk_start_comm(void)
2462 {
2463 /* Start tracing comms if trace printk is set */
2464 if (!buffers_allocated)
2465 return;
2466 tracing_start_cmdline_record();
2467 }
2468
trace_printk_start_stop_comm(int enabled)2469 static void trace_printk_start_stop_comm(int enabled)
2470 {
2471 if (!buffers_allocated)
2472 return;
2473
2474 if (enabled)
2475 tracing_start_cmdline_record();
2476 else
2477 tracing_stop_cmdline_record();
2478 }
2479
2480 /**
2481 * trace_vbprintk - write binary msg to tracing buffer
2482 *
2483 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)2484 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2485 {
2486 struct trace_event_call *call = &event_bprint;
2487 struct ring_buffer_event *event;
2488 struct ring_buffer *buffer;
2489 struct trace_array *tr = &global_trace;
2490 struct bprint_entry *entry;
2491 unsigned long flags;
2492 char *tbuffer;
2493 int len = 0, size, pc;
2494
2495 if (unlikely(tracing_selftest_running || tracing_disabled))
2496 return 0;
2497
2498 /* Don't pollute graph traces with trace_vprintk internals */
2499 pause_graph_tracing();
2500
2501 pc = preempt_count();
2502 preempt_disable_notrace();
2503
2504 tbuffer = get_trace_buf();
2505 if (!tbuffer) {
2506 len = 0;
2507 goto out_nobuffer;
2508 }
2509
2510 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2511
2512 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2513 goto out;
2514
2515 local_save_flags(flags);
2516 size = sizeof(*entry) + sizeof(u32) * len;
2517 buffer = tr->trace_buffer.buffer;
2518 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2519 flags, pc);
2520 if (!event)
2521 goto out;
2522 entry = ring_buffer_event_data(event);
2523 entry->ip = ip;
2524 entry->fmt = fmt;
2525
2526 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2527 if (!call_filter_check_discard(call, entry, buffer, event)) {
2528 __buffer_unlock_commit(buffer, event);
2529 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2530 }
2531
2532 out:
2533 put_trace_buf();
2534
2535 out_nobuffer:
2536 preempt_enable_notrace();
2537 unpause_graph_tracing();
2538
2539 return len;
2540 }
2541 EXPORT_SYMBOL_GPL(trace_vbprintk);
2542
2543 static int
__trace_array_vprintk(struct ring_buffer * buffer,unsigned long ip,const char * fmt,va_list args)2544 __trace_array_vprintk(struct ring_buffer *buffer,
2545 unsigned long ip, const char *fmt, va_list args)
2546 {
2547 struct trace_event_call *call = &event_print;
2548 struct ring_buffer_event *event;
2549 int len = 0, size, pc;
2550 struct print_entry *entry;
2551 unsigned long flags;
2552 char *tbuffer;
2553
2554 if (tracing_disabled || tracing_selftest_running)
2555 return 0;
2556
2557 /* Don't pollute graph traces with trace_vprintk internals */
2558 pause_graph_tracing();
2559
2560 pc = preempt_count();
2561 preempt_disable_notrace();
2562
2563
2564 tbuffer = get_trace_buf();
2565 if (!tbuffer) {
2566 len = 0;
2567 goto out_nobuffer;
2568 }
2569
2570 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2571
2572 local_save_flags(flags);
2573 size = sizeof(*entry) + len + 1;
2574 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2575 flags, pc);
2576 if (!event)
2577 goto out;
2578 entry = ring_buffer_event_data(event);
2579 entry->ip = ip;
2580
2581 memcpy(&entry->buf, tbuffer, len + 1);
2582 if (!call_filter_check_discard(call, entry, buffer, event)) {
2583 __buffer_unlock_commit(buffer, event);
2584 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2585 }
2586
2587 out:
2588 put_trace_buf();
2589
2590 out_nobuffer:
2591 preempt_enable_notrace();
2592 unpause_graph_tracing();
2593
2594 return len;
2595 }
2596
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)2597 int trace_array_vprintk(struct trace_array *tr,
2598 unsigned long ip, const char *fmt, va_list args)
2599 {
2600 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2601 }
2602
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)2603 int trace_array_printk(struct trace_array *tr,
2604 unsigned long ip, const char *fmt, ...)
2605 {
2606 int ret;
2607 va_list ap;
2608
2609 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2610 return 0;
2611
2612 va_start(ap, fmt);
2613 ret = trace_array_vprintk(tr, ip, fmt, ap);
2614 va_end(ap);
2615 return ret;
2616 }
2617
trace_array_printk_buf(struct ring_buffer * buffer,unsigned long ip,const char * fmt,...)2618 int trace_array_printk_buf(struct ring_buffer *buffer,
2619 unsigned long ip, const char *fmt, ...)
2620 {
2621 int ret;
2622 va_list ap;
2623
2624 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2625 return 0;
2626
2627 va_start(ap, fmt);
2628 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2629 va_end(ap);
2630 return ret;
2631 }
2632
trace_vprintk(unsigned long ip,const char * fmt,va_list args)2633 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2634 {
2635 return trace_array_vprintk(&global_trace, ip, fmt, args);
2636 }
2637 EXPORT_SYMBOL_GPL(trace_vprintk);
2638
trace_iterator_increment(struct trace_iterator * iter)2639 static void trace_iterator_increment(struct trace_iterator *iter)
2640 {
2641 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2642
2643 iter->idx++;
2644 if (buf_iter)
2645 ring_buffer_read(buf_iter, NULL);
2646 }
2647
2648 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)2649 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2650 unsigned long *lost_events)
2651 {
2652 struct ring_buffer_event *event;
2653 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2654
2655 if (buf_iter)
2656 event = ring_buffer_iter_peek(buf_iter, ts);
2657 else
2658 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2659 lost_events);
2660
2661 if (event) {
2662 iter->ent_size = ring_buffer_event_length(event);
2663 return ring_buffer_event_data(event);
2664 }
2665 iter->ent_size = 0;
2666 return NULL;
2667 }
2668
2669 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)2670 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2671 unsigned long *missing_events, u64 *ent_ts)
2672 {
2673 struct ring_buffer *buffer = iter->trace_buffer->buffer;
2674 struct trace_entry *ent, *next = NULL;
2675 unsigned long lost_events = 0, next_lost = 0;
2676 int cpu_file = iter->cpu_file;
2677 u64 next_ts = 0, ts;
2678 int next_cpu = -1;
2679 int next_size = 0;
2680 int cpu;
2681
2682 /*
2683 * If we are in a per_cpu trace file, don't bother by iterating over
2684 * all cpu and peek directly.
2685 */
2686 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2687 if (ring_buffer_empty_cpu(buffer, cpu_file))
2688 return NULL;
2689 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2690 if (ent_cpu)
2691 *ent_cpu = cpu_file;
2692
2693 return ent;
2694 }
2695
2696 for_each_tracing_cpu(cpu) {
2697
2698 if (ring_buffer_empty_cpu(buffer, cpu))
2699 continue;
2700
2701 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2702
2703 /*
2704 * Pick the entry with the smallest timestamp:
2705 */
2706 if (ent && (!next || ts < next_ts)) {
2707 next = ent;
2708 next_cpu = cpu;
2709 next_ts = ts;
2710 next_lost = lost_events;
2711 next_size = iter->ent_size;
2712 }
2713 }
2714
2715 iter->ent_size = next_size;
2716
2717 if (ent_cpu)
2718 *ent_cpu = next_cpu;
2719
2720 if (ent_ts)
2721 *ent_ts = next_ts;
2722
2723 if (missing_events)
2724 *missing_events = next_lost;
2725
2726 return next;
2727 }
2728
2729 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)2730 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2731 int *ent_cpu, u64 *ent_ts)
2732 {
2733 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2734 }
2735
2736 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)2737 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2738 {
2739 iter->ent = __find_next_entry(iter, &iter->cpu,
2740 &iter->lost_events, &iter->ts);
2741
2742 if (iter->ent)
2743 trace_iterator_increment(iter);
2744
2745 return iter->ent ? iter : NULL;
2746 }
2747
trace_consume(struct trace_iterator * iter)2748 static void trace_consume(struct trace_iterator *iter)
2749 {
2750 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2751 &iter->lost_events);
2752 }
2753
s_next(struct seq_file * m,void * v,loff_t * pos)2754 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2755 {
2756 struct trace_iterator *iter = m->private;
2757 int i = (int)*pos;
2758 void *ent;
2759
2760 WARN_ON_ONCE(iter->leftover);
2761
2762 (*pos)++;
2763
2764 /* can't go backwards */
2765 if (iter->idx > i)
2766 return NULL;
2767
2768 if (iter->idx < 0)
2769 ent = trace_find_next_entry_inc(iter);
2770 else
2771 ent = iter;
2772
2773 while (ent && iter->idx < i)
2774 ent = trace_find_next_entry_inc(iter);
2775
2776 iter->pos = *pos;
2777
2778 return ent;
2779 }
2780
tracing_iter_reset(struct trace_iterator * iter,int cpu)2781 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2782 {
2783 struct ring_buffer_event *event;
2784 struct ring_buffer_iter *buf_iter;
2785 unsigned long entries = 0;
2786 u64 ts;
2787
2788 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2789
2790 buf_iter = trace_buffer_iter(iter, cpu);
2791 if (!buf_iter)
2792 return;
2793
2794 ring_buffer_iter_reset(buf_iter);
2795
2796 /*
2797 * We could have the case with the max latency tracers
2798 * that a reset never took place on a cpu. This is evident
2799 * by the timestamp being before the start of the buffer.
2800 */
2801 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2802 if (ts >= iter->trace_buffer->time_start)
2803 break;
2804 entries++;
2805 ring_buffer_read(buf_iter, NULL);
2806 }
2807
2808 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2809 }
2810
2811 /*
2812 * The current tracer is copied to avoid a global locking
2813 * all around.
2814 */
s_start(struct seq_file * m,loff_t * pos)2815 static void *s_start(struct seq_file *m, loff_t *pos)
2816 {
2817 struct trace_iterator *iter = m->private;
2818 struct trace_array *tr = iter->tr;
2819 int cpu_file = iter->cpu_file;
2820 void *p = NULL;
2821 loff_t l = 0;
2822 int cpu;
2823
2824 /*
2825 * copy the tracer to avoid using a global lock all around.
2826 * iter->trace is a copy of current_trace, the pointer to the
2827 * name may be used instead of a strcmp(), as iter->trace->name
2828 * will point to the same string as current_trace->name.
2829 */
2830 mutex_lock(&trace_types_lock);
2831 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2832 *iter->trace = *tr->current_trace;
2833 mutex_unlock(&trace_types_lock);
2834
2835 #ifdef CONFIG_TRACER_MAX_TRACE
2836 if (iter->snapshot && iter->trace->use_max_tr)
2837 return ERR_PTR(-EBUSY);
2838 #endif
2839
2840 if (!iter->snapshot)
2841 atomic_inc(&trace_record_cmdline_disabled);
2842
2843 if (*pos != iter->pos) {
2844 iter->ent = NULL;
2845 iter->cpu = 0;
2846 iter->idx = -1;
2847
2848 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2849 for_each_tracing_cpu(cpu)
2850 tracing_iter_reset(iter, cpu);
2851 } else
2852 tracing_iter_reset(iter, cpu_file);
2853
2854 iter->leftover = 0;
2855 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2856 ;
2857
2858 } else {
2859 /*
2860 * If we overflowed the seq_file before, then we want
2861 * to just reuse the trace_seq buffer again.
2862 */
2863 if (iter->leftover)
2864 p = iter;
2865 else {
2866 l = *pos - 1;
2867 p = s_next(m, p, &l);
2868 }
2869 }
2870
2871 trace_event_read_lock();
2872 trace_access_lock(cpu_file);
2873 return p;
2874 }
2875
s_stop(struct seq_file * m,void * p)2876 static void s_stop(struct seq_file *m, void *p)
2877 {
2878 struct trace_iterator *iter = m->private;
2879
2880 #ifdef CONFIG_TRACER_MAX_TRACE
2881 if (iter->snapshot && iter->trace->use_max_tr)
2882 return;
2883 #endif
2884
2885 if (!iter->snapshot)
2886 atomic_dec(&trace_record_cmdline_disabled);
2887
2888 trace_access_unlock(iter->cpu_file);
2889 trace_event_read_unlock();
2890 }
2891
2892 static void
get_total_entries(struct trace_buffer * buf,unsigned long * total,unsigned long * entries)2893 get_total_entries(struct trace_buffer *buf,
2894 unsigned long *total, unsigned long *entries)
2895 {
2896 unsigned long count;
2897 int cpu;
2898
2899 *total = 0;
2900 *entries = 0;
2901
2902 for_each_tracing_cpu(cpu) {
2903 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2904 /*
2905 * If this buffer has skipped entries, then we hold all
2906 * entries for the trace and we need to ignore the
2907 * ones before the time stamp.
2908 */
2909 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2910 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2911 /* total is the same as the entries */
2912 *total += count;
2913 } else
2914 *total += count +
2915 ring_buffer_overrun_cpu(buf->buffer, cpu);
2916 *entries += count;
2917 }
2918 }
2919
print_lat_help_header(struct seq_file * m)2920 static void print_lat_help_header(struct seq_file *m)
2921 {
2922 seq_puts(m, "# _------=> CPU# \n"
2923 "# / _-----=> irqs-off \n"
2924 "# | / _----=> need-resched \n"
2925 "# || / _---=> hardirq/softirq \n"
2926 "# ||| / _--=> preempt-depth \n"
2927 "# |||| / delay \n"
2928 "# cmd pid ||||| time | caller \n"
2929 "# \\ / ||||| \\ | / \n");
2930 }
2931
print_event_info(struct trace_buffer * buf,struct seq_file * m)2932 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2933 {
2934 unsigned long total;
2935 unsigned long entries;
2936
2937 get_total_entries(buf, &total, &entries);
2938 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
2939 entries, total, num_online_cpus());
2940 seq_puts(m, "#\n");
2941 }
2942
print_func_help_header(struct trace_buffer * buf,struct seq_file * m)2943 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2944 {
2945 print_event_info(buf, m);
2946 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
2947 "# | | | | |\n");
2948 }
2949
print_func_help_header_tgid(struct trace_buffer * buf,struct seq_file * m)2950 static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
2951 {
2952 print_event_info(buf, m);
2953 seq_puts(m, "# TASK-PID TGID CPU# TIMESTAMP FUNCTION\n");
2954 seq_puts(m, "# | | | | | |\n");
2955 }
2956
print_func_help_header_irq(struct trace_buffer * buf,struct seq_file * m)2957 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2958 {
2959 print_event_info(buf, m);
2960 seq_puts(m, "# _-----=> irqs-off\n"
2961 "# / _----=> need-resched\n"
2962 "# | / _---=> hardirq/softirq\n"
2963 "# || / _--=> preempt-depth\n"
2964 "# ||| / delay\n"
2965 "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
2966 "# | | | |||| | |\n");
2967 }
2968
print_func_help_header_irq_tgid(struct trace_buffer * buf,struct seq_file * m)2969 static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
2970 {
2971 print_event_info(buf, m);
2972 seq_puts(m, "# _-----=> irqs-off\n");
2973 seq_puts(m, "# / _----=> need-resched\n");
2974 seq_puts(m, "# | / _---=> hardirq/softirq\n");
2975 seq_puts(m, "# || / _--=> preempt-depth\n");
2976 seq_puts(m, "# ||| / delay\n");
2977 seq_puts(m, "# TASK-PID TGID CPU# |||| TIMESTAMP FUNCTION\n");
2978 seq_puts(m, "# | | | | |||| | |\n");
2979 }
2980
2981 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)2982 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2983 {
2984 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2985 struct trace_buffer *buf = iter->trace_buffer;
2986 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2987 struct tracer *type = iter->trace;
2988 unsigned long entries;
2989 unsigned long total;
2990 const char *name = "preemption";
2991
2992 name = type->name;
2993
2994 get_total_entries(buf, &total, &entries);
2995
2996 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2997 name, UTS_RELEASE);
2998 seq_puts(m, "# -----------------------------------"
2999 "---------------------------------\n");
3000 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3001 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3002 nsecs_to_usecs(data->saved_latency),
3003 entries,
3004 total,
3005 buf->cpu,
3006 #if defined(CONFIG_PREEMPT_NONE)
3007 "server",
3008 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3009 "desktop",
3010 #elif defined(CONFIG_PREEMPT)
3011 "preempt",
3012 #else
3013 "unknown",
3014 #endif
3015 /* These are reserved for later use */
3016 0, 0, 0, 0);
3017 #ifdef CONFIG_SMP
3018 seq_printf(m, " #P:%d)\n", num_online_cpus());
3019 #else
3020 seq_puts(m, ")\n");
3021 #endif
3022 seq_puts(m, "# -----------------\n");
3023 seq_printf(m, "# | task: %.16s-%d "
3024 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3025 data->comm, data->pid,
3026 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3027 data->policy, data->rt_priority);
3028 seq_puts(m, "# -----------------\n");
3029
3030 if (data->critical_start) {
3031 seq_puts(m, "# => started at: ");
3032 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3033 trace_print_seq(m, &iter->seq);
3034 seq_puts(m, "\n# => ended at: ");
3035 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3036 trace_print_seq(m, &iter->seq);
3037 seq_puts(m, "\n#\n");
3038 }
3039
3040 seq_puts(m, "#\n");
3041 }
3042
test_cpu_buff_start(struct trace_iterator * iter)3043 static void test_cpu_buff_start(struct trace_iterator *iter)
3044 {
3045 struct trace_seq *s = &iter->seq;
3046 struct trace_array *tr = iter->tr;
3047
3048 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3049 return;
3050
3051 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3052 return;
3053
3054 if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3055 return;
3056
3057 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3058 return;
3059
3060 if (iter->started)
3061 cpumask_set_cpu(iter->cpu, iter->started);
3062
3063 /* Don't print started cpu buffer for the first entry of the trace */
3064 if (iter->idx > 1)
3065 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3066 iter->cpu);
3067 }
3068
print_trace_fmt(struct trace_iterator * iter)3069 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3070 {
3071 struct trace_array *tr = iter->tr;
3072 struct trace_seq *s = &iter->seq;
3073 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3074 struct trace_entry *entry;
3075 struct trace_event *event;
3076
3077 entry = iter->ent;
3078
3079 test_cpu_buff_start(iter);
3080
3081 event = ftrace_find_event(entry->type);
3082
3083 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3084 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3085 trace_print_lat_context(iter);
3086 else
3087 trace_print_context(iter);
3088 }
3089
3090 if (trace_seq_has_overflowed(s))
3091 return TRACE_TYPE_PARTIAL_LINE;
3092
3093 if (event)
3094 return event->funcs->trace(iter, sym_flags, event);
3095
3096 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3097
3098 return trace_handle_return(s);
3099 }
3100
print_raw_fmt(struct trace_iterator * iter)3101 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3102 {
3103 struct trace_array *tr = iter->tr;
3104 struct trace_seq *s = &iter->seq;
3105 struct trace_entry *entry;
3106 struct trace_event *event;
3107
3108 entry = iter->ent;
3109
3110 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3111 trace_seq_printf(s, "%d %d %llu ",
3112 entry->pid, iter->cpu, iter->ts);
3113
3114 if (trace_seq_has_overflowed(s))
3115 return TRACE_TYPE_PARTIAL_LINE;
3116
3117 event = ftrace_find_event(entry->type);
3118 if (event)
3119 return event->funcs->raw(iter, 0, event);
3120
3121 trace_seq_printf(s, "%d ?\n", entry->type);
3122
3123 return trace_handle_return(s);
3124 }
3125
print_hex_fmt(struct trace_iterator * iter)3126 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3127 {
3128 struct trace_array *tr = iter->tr;
3129 struct trace_seq *s = &iter->seq;
3130 unsigned char newline = '\n';
3131 struct trace_entry *entry;
3132 struct trace_event *event;
3133
3134 entry = iter->ent;
3135
3136 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3137 SEQ_PUT_HEX_FIELD(s, entry->pid);
3138 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3139 SEQ_PUT_HEX_FIELD(s, iter->ts);
3140 if (trace_seq_has_overflowed(s))
3141 return TRACE_TYPE_PARTIAL_LINE;
3142 }
3143
3144 event = ftrace_find_event(entry->type);
3145 if (event) {
3146 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3147 if (ret != TRACE_TYPE_HANDLED)
3148 return ret;
3149 }
3150
3151 SEQ_PUT_FIELD(s, newline);
3152
3153 return trace_handle_return(s);
3154 }
3155
print_bin_fmt(struct trace_iterator * iter)3156 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3157 {
3158 struct trace_array *tr = iter->tr;
3159 struct trace_seq *s = &iter->seq;
3160 struct trace_entry *entry;
3161 struct trace_event *event;
3162
3163 entry = iter->ent;
3164
3165 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3166 SEQ_PUT_FIELD(s, entry->pid);
3167 SEQ_PUT_FIELD(s, iter->cpu);
3168 SEQ_PUT_FIELD(s, iter->ts);
3169 if (trace_seq_has_overflowed(s))
3170 return TRACE_TYPE_PARTIAL_LINE;
3171 }
3172
3173 event = ftrace_find_event(entry->type);
3174 return event ? event->funcs->binary(iter, 0, event) :
3175 TRACE_TYPE_HANDLED;
3176 }
3177
trace_empty(struct trace_iterator * iter)3178 int trace_empty(struct trace_iterator *iter)
3179 {
3180 struct ring_buffer_iter *buf_iter;
3181 int cpu;
3182
3183 /* If we are looking at one CPU buffer, only check that one */
3184 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3185 cpu = iter->cpu_file;
3186 buf_iter = trace_buffer_iter(iter, cpu);
3187 if (buf_iter) {
3188 if (!ring_buffer_iter_empty(buf_iter))
3189 return 0;
3190 } else {
3191 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3192 return 0;
3193 }
3194 return 1;
3195 }
3196
3197 for_each_tracing_cpu(cpu) {
3198 buf_iter = trace_buffer_iter(iter, cpu);
3199 if (buf_iter) {
3200 if (!ring_buffer_iter_empty(buf_iter))
3201 return 0;
3202 } else {
3203 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3204 return 0;
3205 }
3206 }
3207
3208 return 1;
3209 }
3210
3211 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3212 enum print_line_t print_trace_line(struct trace_iterator *iter)
3213 {
3214 struct trace_array *tr = iter->tr;
3215 unsigned long trace_flags = tr->trace_flags;
3216 enum print_line_t ret;
3217
3218 if (iter->lost_events) {
3219 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3220 iter->cpu, iter->lost_events);
3221 if (trace_seq_has_overflowed(&iter->seq))
3222 return TRACE_TYPE_PARTIAL_LINE;
3223 }
3224
3225 if (iter->trace && iter->trace->print_line) {
3226 ret = iter->trace->print_line(iter);
3227 if (ret != TRACE_TYPE_UNHANDLED)
3228 return ret;
3229 }
3230
3231 if (iter->ent->type == TRACE_BPUTS &&
3232 trace_flags & TRACE_ITER_PRINTK &&
3233 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3234 return trace_print_bputs_msg_only(iter);
3235
3236 if (iter->ent->type == TRACE_BPRINT &&
3237 trace_flags & TRACE_ITER_PRINTK &&
3238 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3239 return trace_print_bprintk_msg_only(iter);
3240
3241 if (iter->ent->type == TRACE_PRINT &&
3242 trace_flags & TRACE_ITER_PRINTK &&
3243 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3244 return trace_print_printk_msg_only(iter);
3245
3246 if (trace_flags & TRACE_ITER_BIN)
3247 return print_bin_fmt(iter);
3248
3249 if (trace_flags & TRACE_ITER_HEX)
3250 return print_hex_fmt(iter);
3251
3252 if (trace_flags & TRACE_ITER_RAW)
3253 return print_raw_fmt(iter);
3254
3255 return print_trace_fmt(iter);
3256 }
3257
trace_latency_header(struct seq_file * m)3258 void trace_latency_header(struct seq_file *m)
3259 {
3260 struct trace_iterator *iter = m->private;
3261 struct trace_array *tr = iter->tr;
3262
3263 /* print nothing if the buffers are empty */
3264 if (trace_empty(iter))
3265 return;
3266
3267 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3268 print_trace_header(m, iter);
3269
3270 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3271 print_lat_help_header(m);
3272 }
3273
trace_default_header(struct seq_file * m)3274 void trace_default_header(struct seq_file *m)
3275 {
3276 struct trace_iterator *iter = m->private;
3277 struct trace_array *tr = iter->tr;
3278 unsigned long trace_flags = tr->trace_flags;
3279
3280 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3281 return;
3282
3283 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3284 /* print nothing if the buffers are empty */
3285 if (trace_empty(iter))
3286 return;
3287 print_trace_header(m, iter);
3288 if (!(trace_flags & TRACE_ITER_VERBOSE))
3289 print_lat_help_header(m);
3290 } else {
3291 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3292 if (trace_flags & TRACE_ITER_IRQ_INFO)
3293 if (trace_flags & TRACE_ITER_TGID)
3294 print_func_help_header_irq_tgid(iter->trace_buffer, m);
3295 else
3296 print_func_help_header_irq(iter->trace_buffer, m);
3297 else
3298 if (trace_flags & TRACE_ITER_TGID)
3299 print_func_help_header_tgid(iter->trace_buffer, m);
3300 else
3301 print_func_help_header(iter->trace_buffer, m);
3302 }
3303 }
3304 }
3305
test_ftrace_alive(struct seq_file * m)3306 static void test_ftrace_alive(struct seq_file *m)
3307 {
3308 if (!ftrace_is_dead())
3309 return;
3310 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3311 "# MAY BE MISSING FUNCTION EVENTS\n");
3312 }
3313
3314 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)3315 static void show_snapshot_main_help(struct seq_file *m)
3316 {
3317 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3318 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3319 "# Takes a snapshot of the main buffer.\n"
3320 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3321 "# (Doesn't have to be '2' works with any number that\n"
3322 "# is not a '0' or '1')\n");
3323 }
3324
show_snapshot_percpu_help(struct seq_file * m)3325 static void show_snapshot_percpu_help(struct seq_file *m)
3326 {
3327 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3328 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3329 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3330 "# Takes a snapshot of the main buffer for this cpu.\n");
3331 #else
3332 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3333 "# Must use main snapshot file to allocate.\n");
3334 #endif
3335 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3336 "# (Doesn't have to be '2' works with any number that\n"
3337 "# is not a '0' or '1')\n");
3338 }
3339
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3340 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3341 {
3342 if (iter->tr->allocated_snapshot)
3343 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3344 else
3345 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3346
3347 seq_puts(m, "# Snapshot commands:\n");
3348 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3349 show_snapshot_main_help(m);
3350 else
3351 show_snapshot_percpu_help(m);
3352 }
3353 #else
3354 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3355 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3356 #endif
3357
s_show(struct seq_file * m,void * v)3358 static int s_show(struct seq_file *m, void *v)
3359 {
3360 struct trace_iterator *iter = v;
3361 int ret;
3362
3363 if (iter->ent == NULL) {
3364 if (iter->tr) {
3365 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3366 seq_puts(m, "#\n");
3367 test_ftrace_alive(m);
3368 }
3369 if (iter->snapshot && trace_empty(iter))
3370 print_snapshot_help(m, iter);
3371 else if (iter->trace && iter->trace->print_header)
3372 iter->trace->print_header(m);
3373 else
3374 trace_default_header(m);
3375
3376 } else if (iter->leftover) {
3377 /*
3378 * If we filled the seq_file buffer earlier, we
3379 * want to just show it now.
3380 */
3381 ret = trace_print_seq(m, &iter->seq);
3382
3383 /* ret should this time be zero, but you never know */
3384 iter->leftover = ret;
3385
3386 } else {
3387 print_trace_line(iter);
3388 ret = trace_print_seq(m, &iter->seq);
3389 /*
3390 * If we overflow the seq_file buffer, then it will
3391 * ask us for this data again at start up.
3392 * Use that instead.
3393 * ret is 0 if seq_file write succeeded.
3394 * -1 otherwise.
3395 */
3396 iter->leftover = ret;
3397 }
3398
3399 return 0;
3400 }
3401
3402 /*
3403 * Should be used after trace_array_get(), trace_types_lock
3404 * ensures that i_cdev was already initialized.
3405 */
tracing_get_cpu(struct inode * inode)3406 static inline int tracing_get_cpu(struct inode *inode)
3407 {
3408 if (inode->i_cdev) /* See trace_create_cpu_file() */
3409 return (long)inode->i_cdev - 1;
3410 return RING_BUFFER_ALL_CPUS;
3411 }
3412
3413 static const struct seq_operations tracer_seq_ops = {
3414 .start = s_start,
3415 .next = s_next,
3416 .stop = s_stop,
3417 .show = s_show,
3418 };
3419
3420 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3421 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3422 {
3423 struct trace_array *tr = inode->i_private;
3424 struct trace_iterator *iter;
3425 int cpu;
3426
3427 if (tracing_disabled)
3428 return ERR_PTR(-ENODEV);
3429
3430 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3431 if (!iter)
3432 return ERR_PTR(-ENOMEM);
3433
3434 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3435 GFP_KERNEL);
3436 if (!iter->buffer_iter)
3437 goto release;
3438
3439 /*
3440 * We make a copy of the current tracer to avoid concurrent
3441 * changes on it while we are reading.
3442 */
3443 mutex_lock(&trace_types_lock);
3444 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3445 if (!iter->trace)
3446 goto fail;
3447
3448 *iter->trace = *tr->current_trace;
3449
3450 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3451 goto fail;
3452
3453 iter->tr = tr;
3454
3455 #ifdef CONFIG_TRACER_MAX_TRACE
3456 /* Currently only the top directory has a snapshot */
3457 if (tr->current_trace->print_max || snapshot)
3458 iter->trace_buffer = &tr->max_buffer;
3459 else
3460 #endif
3461 iter->trace_buffer = &tr->trace_buffer;
3462 iter->snapshot = snapshot;
3463 iter->pos = -1;
3464 iter->cpu_file = tracing_get_cpu(inode);
3465 mutex_init(&iter->mutex);
3466
3467 /* Notify the tracer early; before we stop tracing. */
3468 if (iter->trace && iter->trace->open)
3469 iter->trace->open(iter);
3470
3471 /* Annotate start of buffers if we had overruns */
3472 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3473 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3474
3475 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3476 if (trace_clocks[tr->clock_id].in_ns)
3477 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3478
3479 /* stop the trace while dumping if we are not opening "snapshot" */
3480 if (!iter->snapshot)
3481 tracing_stop_tr(tr);
3482
3483 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3484 for_each_tracing_cpu(cpu) {
3485 iter->buffer_iter[cpu] =
3486 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3487 }
3488 ring_buffer_read_prepare_sync();
3489 for_each_tracing_cpu(cpu) {
3490 ring_buffer_read_start(iter->buffer_iter[cpu]);
3491 tracing_iter_reset(iter, cpu);
3492 }
3493 } else {
3494 cpu = iter->cpu_file;
3495 iter->buffer_iter[cpu] =
3496 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3497 ring_buffer_read_prepare_sync();
3498 ring_buffer_read_start(iter->buffer_iter[cpu]);
3499 tracing_iter_reset(iter, cpu);
3500 }
3501
3502 mutex_unlock(&trace_types_lock);
3503
3504 return iter;
3505
3506 fail:
3507 mutex_unlock(&trace_types_lock);
3508 kfree(iter->trace);
3509 kfree(iter->buffer_iter);
3510 release:
3511 seq_release_private(inode, file);
3512 return ERR_PTR(-ENOMEM);
3513 }
3514
tracing_open_generic(struct inode * inode,struct file * filp)3515 int tracing_open_generic(struct inode *inode, struct file *filp)
3516 {
3517 if (tracing_disabled)
3518 return -ENODEV;
3519
3520 filp->private_data = inode->i_private;
3521 return 0;
3522 }
3523
tracing_is_disabled(void)3524 bool tracing_is_disabled(void)
3525 {
3526 return (tracing_disabled) ? true: false;
3527 }
3528
3529 /*
3530 * Open and update trace_array ref count.
3531 * Must have the current trace_array passed to it.
3532 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)3533 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3534 {
3535 struct trace_array *tr = inode->i_private;
3536
3537 if (tracing_disabled)
3538 return -ENODEV;
3539
3540 if (trace_array_get(tr) < 0)
3541 return -ENODEV;
3542
3543 filp->private_data = inode->i_private;
3544
3545 return 0;
3546 }
3547
tracing_release(struct inode * inode,struct file * file)3548 static int tracing_release(struct inode *inode, struct file *file)
3549 {
3550 struct trace_array *tr = inode->i_private;
3551 struct seq_file *m = file->private_data;
3552 struct trace_iterator *iter;
3553 int cpu;
3554
3555 if (!(file->f_mode & FMODE_READ)) {
3556 trace_array_put(tr);
3557 return 0;
3558 }
3559
3560 /* Writes do not use seq_file */
3561 iter = m->private;
3562 mutex_lock(&trace_types_lock);
3563
3564 for_each_tracing_cpu(cpu) {
3565 if (iter->buffer_iter[cpu])
3566 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3567 }
3568
3569 if (iter->trace && iter->trace->close)
3570 iter->trace->close(iter);
3571
3572 if (!iter->snapshot)
3573 /* reenable tracing if it was previously enabled */
3574 tracing_start_tr(tr);
3575
3576 __trace_array_put(tr);
3577
3578 mutex_unlock(&trace_types_lock);
3579
3580 mutex_destroy(&iter->mutex);
3581 free_cpumask_var(iter->started);
3582 kfree(iter->trace);
3583 kfree(iter->buffer_iter);
3584 seq_release_private(inode, file);
3585
3586 return 0;
3587 }
3588
tracing_release_generic_tr(struct inode * inode,struct file * file)3589 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3590 {
3591 struct trace_array *tr = inode->i_private;
3592
3593 trace_array_put(tr);
3594 return 0;
3595 }
3596
tracing_single_release_tr(struct inode * inode,struct file * file)3597 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3598 {
3599 struct trace_array *tr = inode->i_private;
3600
3601 trace_array_put(tr);
3602
3603 return single_release(inode, file);
3604 }
3605
tracing_open(struct inode * inode,struct file * file)3606 static int tracing_open(struct inode *inode, struct file *file)
3607 {
3608 struct trace_array *tr = inode->i_private;
3609 struct trace_iterator *iter;
3610 int ret = 0;
3611
3612 if (trace_array_get(tr) < 0)
3613 return -ENODEV;
3614
3615 /* If this file was open for write, then erase contents */
3616 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3617 int cpu = tracing_get_cpu(inode);
3618 struct trace_buffer *trace_buf = &tr->trace_buffer;
3619
3620 #ifdef CONFIG_TRACER_MAX_TRACE
3621 if (tr->current_trace->print_max)
3622 trace_buf = &tr->max_buffer;
3623 #endif
3624
3625 if (cpu == RING_BUFFER_ALL_CPUS)
3626 tracing_reset_online_cpus(trace_buf);
3627 else
3628 tracing_reset(trace_buf, cpu);
3629 }
3630
3631 if (file->f_mode & FMODE_READ) {
3632 iter = __tracing_open(inode, file, false);
3633 if (IS_ERR(iter))
3634 ret = PTR_ERR(iter);
3635 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3636 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3637 }
3638
3639 if (ret < 0)
3640 trace_array_put(tr);
3641
3642 return ret;
3643 }
3644
3645 /*
3646 * Some tracers are not suitable for instance buffers.
3647 * A tracer is always available for the global array (toplevel)
3648 * or if it explicitly states that it is.
3649 */
3650 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)3651 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3652 {
3653 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3654 }
3655
3656 /* Find the next tracer that this trace array may use */
3657 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)3658 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3659 {
3660 while (t && !trace_ok_for_array(t, tr))
3661 t = t->next;
3662
3663 return t;
3664 }
3665
3666 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)3667 t_next(struct seq_file *m, void *v, loff_t *pos)
3668 {
3669 struct trace_array *tr = m->private;
3670 struct tracer *t = v;
3671
3672 (*pos)++;
3673
3674 if (t)
3675 t = get_tracer_for_array(tr, t->next);
3676
3677 return t;
3678 }
3679
t_start(struct seq_file * m,loff_t * pos)3680 static void *t_start(struct seq_file *m, loff_t *pos)
3681 {
3682 struct trace_array *tr = m->private;
3683 struct tracer *t;
3684 loff_t l = 0;
3685
3686 mutex_lock(&trace_types_lock);
3687
3688 t = get_tracer_for_array(tr, trace_types);
3689 for (; t && l < *pos; t = t_next(m, t, &l))
3690 ;
3691
3692 return t;
3693 }
3694
t_stop(struct seq_file * m,void * p)3695 static void t_stop(struct seq_file *m, void *p)
3696 {
3697 mutex_unlock(&trace_types_lock);
3698 }
3699
t_show(struct seq_file * m,void * v)3700 static int t_show(struct seq_file *m, void *v)
3701 {
3702 struct tracer *t = v;
3703
3704 if (!t)
3705 return 0;
3706
3707 seq_puts(m, t->name);
3708 if (t->next)
3709 seq_putc(m, ' ');
3710 else
3711 seq_putc(m, '\n');
3712
3713 return 0;
3714 }
3715
3716 static const struct seq_operations show_traces_seq_ops = {
3717 .start = t_start,
3718 .next = t_next,
3719 .stop = t_stop,
3720 .show = t_show,
3721 };
3722
show_traces_open(struct inode * inode,struct file * file)3723 static int show_traces_open(struct inode *inode, struct file *file)
3724 {
3725 struct trace_array *tr = inode->i_private;
3726 struct seq_file *m;
3727 int ret;
3728
3729 if (tracing_disabled)
3730 return -ENODEV;
3731
3732 ret = seq_open(file, &show_traces_seq_ops);
3733 if (ret)
3734 return ret;
3735
3736 m = file->private_data;
3737 m->private = tr;
3738
3739 return 0;
3740 }
3741
3742 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)3743 tracing_write_stub(struct file *filp, const char __user *ubuf,
3744 size_t count, loff_t *ppos)
3745 {
3746 return count;
3747 }
3748
tracing_lseek(struct file * file,loff_t offset,int whence)3749 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3750 {
3751 int ret;
3752
3753 if (file->f_mode & FMODE_READ)
3754 ret = seq_lseek(file, offset, whence);
3755 else
3756 file->f_pos = ret = 0;
3757
3758 return ret;
3759 }
3760
3761 static const struct file_operations tracing_fops = {
3762 .open = tracing_open,
3763 .read = seq_read,
3764 .write = tracing_write_stub,
3765 .llseek = tracing_lseek,
3766 .release = tracing_release,
3767 };
3768
3769 static const struct file_operations show_traces_fops = {
3770 .open = show_traces_open,
3771 .read = seq_read,
3772 .release = seq_release,
3773 .llseek = seq_lseek,
3774 };
3775
3776 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)3777 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3778 size_t count, loff_t *ppos)
3779 {
3780 struct trace_array *tr = file_inode(filp)->i_private;
3781 char *mask_str;
3782 int len;
3783
3784 len = snprintf(NULL, 0, "%*pb\n",
3785 cpumask_pr_args(tr->tracing_cpumask)) + 1;
3786 mask_str = kmalloc(len, GFP_KERNEL);
3787 if (!mask_str)
3788 return -ENOMEM;
3789
3790 len = snprintf(mask_str, len, "%*pb\n",
3791 cpumask_pr_args(tr->tracing_cpumask));
3792 if (len >= count) {
3793 count = -EINVAL;
3794 goto out_err;
3795 }
3796 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3797
3798 out_err:
3799 kfree(mask_str);
3800
3801 return count;
3802 }
3803
3804 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)3805 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3806 size_t count, loff_t *ppos)
3807 {
3808 struct trace_array *tr = file_inode(filp)->i_private;
3809 cpumask_var_t tracing_cpumask_new;
3810 int err, cpu;
3811
3812 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3813 return -ENOMEM;
3814
3815 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3816 if (err)
3817 goto err_unlock;
3818
3819 local_irq_disable();
3820 arch_spin_lock(&tr->max_lock);
3821 for_each_tracing_cpu(cpu) {
3822 /*
3823 * Increase/decrease the disabled counter if we are
3824 * about to flip a bit in the cpumask:
3825 */
3826 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3827 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3828 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3829 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3830 }
3831 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3832 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3833 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3834 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3835 }
3836 }
3837 arch_spin_unlock(&tr->max_lock);
3838 local_irq_enable();
3839
3840 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3841 free_cpumask_var(tracing_cpumask_new);
3842
3843 return count;
3844
3845 err_unlock:
3846 free_cpumask_var(tracing_cpumask_new);
3847
3848 return err;
3849 }
3850
3851 static const struct file_operations tracing_cpumask_fops = {
3852 .open = tracing_open_generic_tr,
3853 .read = tracing_cpumask_read,
3854 .write = tracing_cpumask_write,
3855 .release = tracing_release_generic_tr,
3856 .llseek = generic_file_llseek,
3857 };
3858
tracing_trace_options_show(struct seq_file * m,void * v)3859 static int tracing_trace_options_show(struct seq_file *m, void *v)
3860 {
3861 struct tracer_opt *trace_opts;
3862 struct trace_array *tr = m->private;
3863 u32 tracer_flags;
3864 int i;
3865
3866 mutex_lock(&trace_types_lock);
3867 tracer_flags = tr->current_trace->flags->val;
3868 trace_opts = tr->current_trace->flags->opts;
3869
3870 for (i = 0; trace_options[i]; i++) {
3871 if (tr->trace_flags & (1 << i))
3872 seq_printf(m, "%s\n", trace_options[i]);
3873 else
3874 seq_printf(m, "no%s\n", trace_options[i]);
3875 }
3876
3877 for (i = 0; trace_opts[i].name; i++) {
3878 if (tracer_flags & trace_opts[i].bit)
3879 seq_printf(m, "%s\n", trace_opts[i].name);
3880 else
3881 seq_printf(m, "no%s\n", trace_opts[i].name);
3882 }
3883 mutex_unlock(&trace_types_lock);
3884
3885 return 0;
3886 }
3887
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)3888 static int __set_tracer_option(struct trace_array *tr,
3889 struct tracer_flags *tracer_flags,
3890 struct tracer_opt *opts, int neg)
3891 {
3892 struct tracer *trace = tracer_flags->trace;
3893 int ret;
3894
3895 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3896 if (ret)
3897 return ret;
3898
3899 if (neg)
3900 tracer_flags->val &= ~opts->bit;
3901 else
3902 tracer_flags->val |= opts->bit;
3903 return 0;
3904 }
3905
3906 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)3907 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3908 {
3909 struct tracer *trace = tr->current_trace;
3910 struct tracer_flags *tracer_flags = trace->flags;
3911 struct tracer_opt *opts = NULL;
3912 int i;
3913
3914 for (i = 0; tracer_flags->opts[i].name; i++) {
3915 opts = &tracer_flags->opts[i];
3916
3917 if (strcmp(cmp, opts->name) == 0)
3918 return __set_tracer_option(tr, trace->flags, opts, neg);
3919 }
3920
3921 return -EINVAL;
3922 }
3923
3924 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)3925 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3926 {
3927 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3928 return -1;
3929
3930 return 0;
3931 }
3932
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)3933 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3934 {
3935 /* do nothing if flag is already set */
3936 if (!!(tr->trace_flags & mask) == !!enabled)
3937 return 0;
3938
3939 /* Give the tracer a chance to approve the change */
3940 if (tr->current_trace->flag_changed)
3941 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3942 return -EINVAL;
3943
3944 if (enabled)
3945 tr->trace_flags |= mask;
3946 else
3947 tr->trace_flags &= ~mask;
3948
3949 if (mask == TRACE_ITER_RECORD_CMD)
3950 trace_event_enable_cmd_record(enabled);
3951
3952 if (mask == TRACE_ITER_EVENT_FORK)
3953 trace_event_follow_fork(tr, enabled);
3954
3955 if (mask == TRACE_ITER_OVERWRITE) {
3956 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3957 #ifdef CONFIG_TRACER_MAX_TRACE
3958 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3959 #endif
3960 }
3961
3962 if (mask == TRACE_ITER_PRINTK) {
3963 trace_printk_start_stop_comm(enabled);
3964 trace_printk_control(enabled);
3965 }
3966
3967 return 0;
3968 }
3969
trace_set_options(struct trace_array * tr,char * option)3970 static int trace_set_options(struct trace_array *tr, char *option)
3971 {
3972 char *cmp;
3973 int neg = 0;
3974 int ret = -ENODEV;
3975 int i;
3976 size_t orig_len = strlen(option);
3977
3978 cmp = strstrip(option);
3979
3980 if (strncmp(cmp, "no", 2) == 0) {
3981 neg = 1;
3982 cmp += 2;
3983 }
3984
3985 mutex_lock(&trace_types_lock);
3986
3987 for (i = 0; trace_options[i]; i++) {
3988 if (strcmp(cmp, trace_options[i]) == 0) {
3989 ret = set_tracer_flag(tr, 1 << i, !neg);
3990 break;
3991 }
3992 }
3993
3994 /* If no option could be set, test the specific tracer options */
3995 if (!trace_options[i])
3996 ret = set_tracer_option(tr, cmp, neg);
3997
3998 mutex_unlock(&trace_types_lock);
3999
4000 /*
4001 * If the first trailing whitespace is replaced with '\0' by strstrip,
4002 * turn it back into a space.
4003 */
4004 if (orig_len > strlen(option))
4005 option[strlen(option)] = ' ';
4006
4007 return ret;
4008 }
4009
apply_trace_boot_options(void)4010 static void __init apply_trace_boot_options(void)
4011 {
4012 char *buf = trace_boot_options_buf;
4013 char *option;
4014
4015 while (true) {
4016 option = strsep(&buf, ",");
4017
4018 if (!option)
4019 break;
4020
4021 if (*option)
4022 trace_set_options(&global_trace, option);
4023
4024 /* Put back the comma to allow this to be called again */
4025 if (buf)
4026 *(buf - 1) = ',';
4027 }
4028 }
4029
4030 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4031 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4032 size_t cnt, loff_t *ppos)
4033 {
4034 struct seq_file *m = filp->private_data;
4035 struct trace_array *tr = m->private;
4036 char buf[64];
4037 int ret;
4038
4039 if (cnt >= sizeof(buf))
4040 return -EINVAL;
4041
4042 if (copy_from_user(buf, ubuf, cnt))
4043 return -EFAULT;
4044
4045 buf[cnt] = 0;
4046
4047 ret = trace_set_options(tr, buf);
4048 if (ret < 0)
4049 return ret;
4050
4051 *ppos += cnt;
4052
4053 return cnt;
4054 }
4055
tracing_trace_options_open(struct inode * inode,struct file * file)4056 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4057 {
4058 struct trace_array *tr = inode->i_private;
4059 int ret;
4060
4061 if (tracing_disabled)
4062 return -ENODEV;
4063
4064 if (trace_array_get(tr) < 0)
4065 return -ENODEV;
4066
4067 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4068 if (ret < 0)
4069 trace_array_put(tr);
4070
4071 return ret;
4072 }
4073
4074 static const struct file_operations tracing_iter_fops = {
4075 .open = tracing_trace_options_open,
4076 .read = seq_read,
4077 .llseek = seq_lseek,
4078 .release = tracing_single_release_tr,
4079 .write = tracing_trace_options_write,
4080 };
4081
4082 static const char readme_msg[] =
4083 "tracing mini-HOWTO:\n\n"
4084 "# echo 0 > tracing_on : quick way to disable tracing\n"
4085 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4086 " Important files:\n"
4087 " trace\t\t\t- The static contents of the buffer\n"
4088 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4089 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4090 " current_tracer\t- function and latency tracers\n"
4091 " available_tracers\t- list of configured tracers for current_tracer\n"
4092 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4093 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4094 " trace_clock\t\t-change the clock used to order events\n"
4095 " local: Per cpu clock but may not be synced across CPUs\n"
4096 " global: Synced across CPUs but slows tracing down.\n"
4097 " counter: Not a clock, but just an increment\n"
4098 " uptime: Jiffy counter from time of boot\n"
4099 " perf: Same clock that perf events use\n"
4100 #ifdef CONFIG_X86_64
4101 " x86-tsc: TSC cycle counter\n"
4102 #endif
4103 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4104 " tracing_cpumask\t- Limit which CPUs to trace\n"
4105 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4106 "\t\t\t Remove sub-buffer with rmdir\n"
4107 " trace_options\t\t- Set format or modify how tracing happens\n"
4108 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4109 "\t\t\t option name\n"
4110 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4111 #ifdef CONFIG_DYNAMIC_FTRACE
4112 "\n available_filter_functions - list of functions that can be filtered on\n"
4113 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4114 "\t\t\t functions\n"
4115 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4116 "\t modules: Can select a group via module\n"
4117 "\t Format: :mod:<module-name>\n"
4118 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4119 "\t triggers: a command to perform when function is hit\n"
4120 "\t Format: <function>:<trigger>[:count]\n"
4121 "\t trigger: traceon, traceoff\n"
4122 "\t\t enable_event:<system>:<event>\n"
4123 "\t\t disable_event:<system>:<event>\n"
4124 #ifdef CONFIG_STACKTRACE
4125 "\t\t stacktrace\n"
4126 #endif
4127 #ifdef CONFIG_TRACER_SNAPSHOT
4128 "\t\t snapshot\n"
4129 #endif
4130 "\t\t dump\n"
4131 "\t\t cpudump\n"
4132 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4133 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4134 "\t The first one will disable tracing every time do_fault is hit\n"
4135 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4136 "\t The first time do trap is hit and it disables tracing, the\n"
4137 "\t counter will decrement to 2. If tracing is already disabled,\n"
4138 "\t the counter will not decrement. It only decrements when the\n"
4139 "\t trigger did work\n"
4140 "\t To remove trigger without count:\n"
4141 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4142 "\t To remove trigger with a count:\n"
4143 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4144 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4145 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4146 "\t modules: Can select a group via module command :mod:\n"
4147 "\t Does not accept triggers\n"
4148 #endif /* CONFIG_DYNAMIC_FTRACE */
4149 #ifdef CONFIG_FUNCTION_TRACER
4150 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4151 "\t\t (function)\n"
4152 #endif
4153 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4154 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4155 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4156 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4157 #endif
4158 #ifdef CONFIG_TRACER_SNAPSHOT
4159 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4160 "\t\t\t snapshot buffer. Read the contents for more\n"
4161 "\t\t\t information\n"
4162 #endif
4163 #ifdef CONFIG_STACK_TRACER
4164 " stack_trace\t\t- Shows the max stack trace when active\n"
4165 " stack_max_size\t- Shows current max stack size that was traced\n"
4166 "\t\t\t Write into this file to reset the max size (trigger a\n"
4167 "\t\t\t new trace)\n"
4168 #ifdef CONFIG_DYNAMIC_FTRACE
4169 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4170 "\t\t\t traces\n"
4171 #endif
4172 #endif /* CONFIG_STACK_TRACER */
4173 #ifdef CONFIG_KPROBE_EVENT
4174 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4175 "\t\t\t Write into this file to define/undefine new trace events.\n"
4176 #endif
4177 #ifdef CONFIG_UPROBE_EVENT
4178 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4179 "\t\t\t Write into this file to define/undefine new trace events.\n"
4180 #endif
4181 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4182 "\t accepts: event-definitions (one definition per line)\n"
4183 "\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4184 "\t -:[<group>/]<event>\n"
4185 #ifdef CONFIG_KPROBE_EVENT
4186 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4187 #endif
4188 #ifdef CONFIG_UPROBE_EVENT
4189 "\t place: <path>:<offset>\n"
4190 #endif
4191 "\t args: <name>=fetcharg[:type]\n"
4192 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4193 "\t $stack<index>, $stack, $retval, $comm\n"
4194 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4195 "\t b<bit-width>@<bit-offset>/<container-size>\n"
4196 #endif
4197 " events/\t\t- Directory containing all trace event subsystems:\n"
4198 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4199 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4200 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4201 "\t\t\t events\n"
4202 " filter\t\t- If set, only events passing filter are traced\n"
4203 " events/<system>/<event>/\t- Directory containing control files for\n"
4204 "\t\t\t <event>:\n"
4205 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4206 " filter\t\t- If set, only events passing filter are traced\n"
4207 " trigger\t\t- If set, a command to perform when event is hit\n"
4208 "\t Format: <trigger>[:count][if <filter>]\n"
4209 "\t trigger: traceon, traceoff\n"
4210 "\t enable_event:<system>:<event>\n"
4211 "\t disable_event:<system>:<event>\n"
4212 #ifdef CONFIG_HIST_TRIGGERS
4213 "\t enable_hist:<system>:<event>\n"
4214 "\t disable_hist:<system>:<event>\n"
4215 #endif
4216 #ifdef CONFIG_STACKTRACE
4217 "\t\t stacktrace\n"
4218 #endif
4219 #ifdef CONFIG_TRACER_SNAPSHOT
4220 "\t\t snapshot\n"
4221 #endif
4222 #ifdef CONFIG_HIST_TRIGGERS
4223 "\t\t hist (see below)\n"
4224 #endif
4225 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4226 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4227 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4228 "\t events/block/block_unplug/trigger\n"
4229 "\t The first disables tracing every time block_unplug is hit.\n"
4230 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4231 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4232 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4233 "\t Like function triggers, the counter is only decremented if it\n"
4234 "\t enabled or disabled tracing.\n"
4235 "\t To remove a trigger without a count:\n"
4236 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4237 "\t To remove a trigger with a count:\n"
4238 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4239 "\t Filters can be ignored when removing a trigger.\n"
4240 #ifdef CONFIG_HIST_TRIGGERS
4241 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4242 "\t Format: hist:keys=<field1[,field2,...]>\n"
4243 "\t [:values=<field1[,field2,...]>]\n"
4244 "\t [:sort=<field1[,field2,...]>]\n"
4245 "\t [:size=#entries]\n"
4246 "\t [:pause][:continue][:clear]\n"
4247 "\t [:name=histname1]\n"
4248 "\t [if <filter>]\n\n"
4249 "\t When a matching event is hit, an entry is added to a hash\n"
4250 "\t table using the key(s) and value(s) named, and the value of a\n"
4251 "\t sum called 'hitcount' is incremented. Keys and values\n"
4252 "\t correspond to fields in the event's format description. Keys\n"
4253 "\t can be any field, or the special string 'stacktrace'.\n"
4254 "\t Compound keys consisting of up to two fields can be specified\n"
4255 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4256 "\t fields. Sort keys consisting of up to two fields can be\n"
4257 "\t specified using the 'sort' keyword. The sort direction can\n"
4258 "\t be modified by appending '.descending' or '.ascending' to a\n"
4259 "\t sort field. The 'size' parameter can be used to specify more\n"
4260 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4261 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4262 "\t its histogram data will be shared with other triggers of the\n"
4263 "\t same name, and trigger hits will update this common data.\n\n"
4264 "\t Reading the 'hist' file for the event will dump the hash\n"
4265 "\t table in its entirety to stdout. If there are multiple hist\n"
4266 "\t triggers attached to an event, there will be a table for each\n"
4267 "\t trigger in the output. The table displayed for a named\n"
4268 "\t trigger will be the same as any other instance having the\n"
4269 "\t same name. The default format used to display a given field\n"
4270 "\t can be modified by appending any of the following modifiers\n"
4271 "\t to the field name, as applicable:\n\n"
4272 "\t .hex display a number as a hex value\n"
4273 "\t .sym display an address as a symbol\n"
4274 "\t .sym-offset display an address as a symbol and offset\n"
4275 "\t .execname display a common_pid as a program name\n"
4276 "\t .syscall display a syscall id as a syscall name\n\n"
4277 "\t .log2 display log2 value rather than raw number\n\n"
4278 "\t The 'pause' parameter can be used to pause an existing hist\n"
4279 "\t trigger or to start a hist trigger but not log any events\n"
4280 "\t until told to do so. 'continue' can be used to start or\n"
4281 "\t restart a paused hist trigger.\n\n"
4282 "\t The 'clear' parameter will clear the contents of a running\n"
4283 "\t hist trigger and leave its current paused/active state\n"
4284 "\t unchanged.\n\n"
4285 "\t The enable_hist and disable_hist triggers can be used to\n"
4286 "\t have one event conditionally start and stop another event's\n"
4287 "\t already-attached hist trigger. The syntax is analagous to\n"
4288 "\t the enable_event and disable_event triggers.\n"
4289 #endif
4290 ;
4291
4292 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4293 tracing_readme_read(struct file *filp, char __user *ubuf,
4294 size_t cnt, loff_t *ppos)
4295 {
4296 return simple_read_from_buffer(ubuf, cnt, ppos,
4297 readme_msg, strlen(readme_msg));
4298 }
4299
4300 static const struct file_operations tracing_readme_fops = {
4301 .open = tracing_open_generic,
4302 .read = tracing_readme_read,
4303 .llseek = generic_file_llseek,
4304 };
4305
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)4306 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4307 {
4308 unsigned int *ptr = v;
4309
4310 if (*pos || m->count)
4311 ptr++;
4312
4313 (*pos)++;
4314
4315 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4316 ptr++) {
4317 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4318 continue;
4319
4320 return ptr;
4321 }
4322
4323 return NULL;
4324 }
4325
saved_cmdlines_start(struct seq_file * m,loff_t * pos)4326 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4327 {
4328 void *v;
4329 loff_t l = 0;
4330
4331 preempt_disable();
4332 arch_spin_lock(&trace_cmdline_lock);
4333
4334 v = &savedcmd->map_cmdline_to_pid[0];
4335 while (l <= *pos) {
4336 v = saved_cmdlines_next(m, v, &l);
4337 if (!v)
4338 return NULL;
4339 }
4340
4341 return v;
4342 }
4343
saved_cmdlines_stop(struct seq_file * m,void * v)4344 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4345 {
4346 arch_spin_unlock(&trace_cmdline_lock);
4347 preempt_enable();
4348 }
4349
saved_cmdlines_show(struct seq_file * m,void * v)4350 static int saved_cmdlines_show(struct seq_file *m, void *v)
4351 {
4352 char buf[TASK_COMM_LEN];
4353 unsigned int *pid = v;
4354
4355 __trace_find_cmdline(*pid, buf);
4356 seq_printf(m, "%d %s\n", *pid, buf);
4357 return 0;
4358 }
4359
4360 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4361 .start = saved_cmdlines_start,
4362 .next = saved_cmdlines_next,
4363 .stop = saved_cmdlines_stop,
4364 .show = saved_cmdlines_show,
4365 };
4366
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)4367 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4368 {
4369 if (tracing_disabled)
4370 return -ENODEV;
4371
4372 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4373 }
4374
4375 static const struct file_operations tracing_saved_cmdlines_fops = {
4376 .open = tracing_saved_cmdlines_open,
4377 .read = seq_read,
4378 .llseek = seq_lseek,
4379 .release = seq_release,
4380 };
4381
4382 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4383 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4384 size_t cnt, loff_t *ppos)
4385 {
4386 char buf[64];
4387 int r;
4388
4389 arch_spin_lock(&trace_cmdline_lock);
4390 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4391 arch_spin_unlock(&trace_cmdline_lock);
4392
4393 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4394 }
4395
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)4396 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4397 {
4398 kfree(s->saved_cmdlines);
4399 kfree(s->map_cmdline_to_pid);
4400 kfree(s);
4401 }
4402
tracing_resize_saved_cmdlines(unsigned int val)4403 static int tracing_resize_saved_cmdlines(unsigned int val)
4404 {
4405 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4406
4407 s = kmalloc(sizeof(*s), GFP_KERNEL);
4408 if (!s)
4409 return -ENOMEM;
4410
4411 if (allocate_cmdlines_buffer(val, s) < 0) {
4412 kfree(s);
4413 return -ENOMEM;
4414 }
4415
4416 arch_spin_lock(&trace_cmdline_lock);
4417 savedcmd_temp = savedcmd;
4418 savedcmd = s;
4419 arch_spin_unlock(&trace_cmdline_lock);
4420 free_saved_cmdlines_buffer(savedcmd_temp);
4421
4422 return 0;
4423 }
4424
4425 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4426 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4427 size_t cnt, loff_t *ppos)
4428 {
4429 unsigned long val;
4430 int ret;
4431
4432 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4433 if (ret)
4434 return ret;
4435
4436 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4437 if (!val || val > PID_MAX_DEFAULT)
4438 return -EINVAL;
4439
4440 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4441 if (ret < 0)
4442 return ret;
4443
4444 *ppos += cnt;
4445
4446 return cnt;
4447 }
4448
4449 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4450 .open = tracing_open_generic,
4451 .read = tracing_saved_cmdlines_size_read,
4452 .write = tracing_saved_cmdlines_size_write,
4453 };
4454
4455 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4456 static union trace_enum_map_item *
update_enum_map(union trace_enum_map_item * ptr)4457 update_enum_map(union trace_enum_map_item *ptr)
4458 {
4459 if (!ptr->map.enum_string) {
4460 if (ptr->tail.next) {
4461 ptr = ptr->tail.next;
4462 /* Set ptr to the next real item (skip head) */
4463 ptr++;
4464 } else
4465 return NULL;
4466 }
4467 return ptr;
4468 }
4469
enum_map_next(struct seq_file * m,void * v,loff_t * pos)4470 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4471 {
4472 union trace_enum_map_item *ptr = v;
4473
4474 /*
4475 * Paranoid! If ptr points to end, we don't want to increment past it.
4476 * This really should never happen.
4477 */
4478 ptr = update_enum_map(ptr);
4479 if (WARN_ON_ONCE(!ptr))
4480 return NULL;
4481
4482 ptr++;
4483
4484 (*pos)++;
4485
4486 ptr = update_enum_map(ptr);
4487
4488 return ptr;
4489 }
4490
enum_map_start(struct seq_file * m,loff_t * pos)4491 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4492 {
4493 union trace_enum_map_item *v;
4494 loff_t l = 0;
4495
4496 mutex_lock(&trace_enum_mutex);
4497
4498 v = trace_enum_maps;
4499 if (v)
4500 v++;
4501
4502 while (v && l < *pos) {
4503 v = enum_map_next(m, v, &l);
4504 }
4505
4506 return v;
4507 }
4508
enum_map_stop(struct seq_file * m,void * v)4509 static void enum_map_stop(struct seq_file *m, void *v)
4510 {
4511 mutex_unlock(&trace_enum_mutex);
4512 }
4513
enum_map_show(struct seq_file * m,void * v)4514 static int enum_map_show(struct seq_file *m, void *v)
4515 {
4516 union trace_enum_map_item *ptr = v;
4517
4518 seq_printf(m, "%s %ld (%s)\n",
4519 ptr->map.enum_string, ptr->map.enum_value,
4520 ptr->map.system);
4521
4522 return 0;
4523 }
4524
4525 static const struct seq_operations tracing_enum_map_seq_ops = {
4526 .start = enum_map_start,
4527 .next = enum_map_next,
4528 .stop = enum_map_stop,
4529 .show = enum_map_show,
4530 };
4531
tracing_enum_map_open(struct inode * inode,struct file * filp)4532 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4533 {
4534 if (tracing_disabled)
4535 return -ENODEV;
4536
4537 return seq_open(filp, &tracing_enum_map_seq_ops);
4538 }
4539
4540 static const struct file_operations tracing_enum_map_fops = {
4541 .open = tracing_enum_map_open,
4542 .read = seq_read,
4543 .llseek = seq_lseek,
4544 .release = seq_release,
4545 };
4546
4547 static inline union trace_enum_map_item *
trace_enum_jmp_to_tail(union trace_enum_map_item * ptr)4548 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4549 {
4550 /* Return tail of array given the head */
4551 return ptr + ptr->head.length + 1;
4552 }
4553
4554 static void
trace_insert_enum_map_file(struct module * mod,struct trace_enum_map ** start,int len)4555 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4556 int len)
4557 {
4558 struct trace_enum_map **stop;
4559 struct trace_enum_map **map;
4560 union trace_enum_map_item *map_array;
4561 union trace_enum_map_item *ptr;
4562
4563 stop = start + len;
4564
4565 /*
4566 * The trace_enum_maps contains the map plus a head and tail item,
4567 * where the head holds the module and length of array, and the
4568 * tail holds a pointer to the next list.
4569 */
4570 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4571 if (!map_array) {
4572 pr_warn("Unable to allocate trace enum mapping\n");
4573 return;
4574 }
4575
4576 mutex_lock(&trace_enum_mutex);
4577
4578 if (!trace_enum_maps)
4579 trace_enum_maps = map_array;
4580 else {
4581 ptr = trace_enum_maps;
4582 for (;;) {
4583 ptr = trace_enum_jmp_to_tail(ptr);
4584 if (!ptr->tail.next)
4585 break;
4586 ptr = ptr->tail.next;
4587
4588 }
4589 ptr->tail.next = map_array;
4590 }
4591 map_array->head.mod = mod;
4592 map_array->head.length = len;
4593 map_array++;
4594
4595 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4596 map_array->map = **map;
4597 map_array++;
4598 }
4599 memset(map_array, 0, sizeof(*map_array));
4600
4601 mutex_unlock(&trace_enum_mutex);
4602 }
4603
trace_create_enum_file(struct dentry * d_tracer)4604 static void trace_create_enum_file(struct dentry *d_tracer)
4605 {
4606 trace_create_file("enum_map", 0444, d_tracer,
4607 NULL, &tracing_enum_map_fops);
4608 }
4609
4610 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
trace_create_enum_file(struct dentry * d_tracer)4611 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
trace_insert_enum_map_file(struct module * mod,struct trace_enum_map ** start,int len)4612 static inline void trace_insert_enum_map_file(struct module *mod,
4613 struct trace_enum_map **start, int len) { }
4614 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4615
trace_insert_enum_map(struct module * mod,struct trace_enum_map ** start,int len)4616 static void trace_insert_enum_map(struct module *mod,
4617 struct trace_enum_map **start, int len)
4618 {
4619 struct trace_enum_map **map;
4620
4621 if (len <= 0)
4622 return;
4623
4624 map = start;
4625
4626 trace_event_enum_update(map, len);
4627
4628 trace_insert_enum_map_file(mod, start, len);
4629 }
4630
4631 static ssize_t
tracing_saved_tgids_read(struct file * file,char __user * ubuf,size_t cnt,loff_t * ppos)4632 tracing_saved_tgids_read(struct file *file, char __user *ubuf,
4633 size_t cnt, loff_t *ppos)
4634 {
4635 char *file_buf;
4636 char *buf;
4637 int len = 0;
4638 int pid;
4639 int i;
4640
4641 file_buf = kmalloc(SAVED_CMDLINES_DEFAULT*(16+1+16), GFP_KERNEL);
4642 if (!file_buf)
4643 return -ENOMEM;
4644
4645 buf = file_buf;
4646
4647 for (i = 0; i < SAVED_CMDLINES_DEFAULT; i++) {
4648 int tgid;
4649 int r;
4650
4651 pid = savedcmd->map_cmdline_to_pid[i];
4652 if (pid == -1 || pid == NO_CMDLINE_MAP)
4653 continue;
4654
4655 tgid = trace_find_tgid(pid);
4656 r = sprintf(buf, "%d %d\n", pid, tgid);
4657 buf += r;
4658 len += r;
4659 }
4660
4661 len = simple_read_from_buffer(ubuf, cnt, ppos,
4662 file_buf, len);
4663
4664 kfree(file_buf);
4665
4666 return len;
4667 }
4668
4669 static const struct file_operations tracing_saved_tgids_fops = {
4670 .open = tracing_open_generic,
4671 .read = tracing_saved_tgids_read,
4672 .llseek = generic_file_llseek,
4673 };
4674
4675 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4676 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4677 size_t cnt, loff_t *ppos)
4678 {
4679 struct trace_array *tr = filp->private_data;
4680 char buf[MAX_TRACER_SIZE+2];
4681 int r;
4682
4683 mutex_lock(&trace_types_lock);
4684 r = sprintf(buf, "%s\n", tr->current_trace->name);
4685 mutex_unlock(&trace_types_lock);
4686
4687 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4688 }
4689
tracer_init(struct tracer * t,struct trace_array * tr)4690 int tracer_init(struct tracer *t, struct trace_array *tr)
4691 {
4692 tracing_reset_online_cpus(&tr->trace_buffer);
4693 return t->init(tr);
4694 }
4695
set_buffer_entries(struct trace_buffer * buf,unsigned long val)4696 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4697 {
4698 int cpu;
4699
4700 for_each_tracing_cpu(cpu)
4701 per_cpu_ptr(buf->data, cpu)->entries = val;
4702 }
4703
4704 #ifdef CONFIG_TRACER_MAX_TRACE
4705 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct trace_buffer * trace_buf,struct trace_buffer * size_buf,int cpu_id)4706 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4707 struct trace_buffer *size_buf, int cpu_id)
4708 {
4709 int cpu, ret = 0;
4710
4711 if (cpu_id == RING_BUFFER_ALL_CPUS) {
4712 for_each_tracing_cpu(cpu) {
4713 ret = ring_buffer_resize(trace_buf->buffer,
4714 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4715 if (ret < 0)
4716 break;
4717 per_cpu_ptr(trace_buf->data, cpu)->entries =
4718 per_cpu_ptr(size_buf->data, cpu)->entries;
4719 }
4720 } else {
4721 ret = ring_buffer_resize(trace_buf->buffer,
4722 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4723 if (ret == 0)
4724 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4725 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4726 }
4727
4728 return ret;
4729 }
4730 #endif /* CONFIG_TRACER_MAX_TRACE */
4731
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)4732 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4733 unsigned long size, int cpu)
4734 {
4735 int ret;
4736
4737 /*
4738 * If kernel or user changes the size of the ring buffer
4739 * we use the size that was given, and we can forget about
4740 * expanding it later.
4741 */
4742 ring_buffer_expanded = true;
4743
4744 /* May be called before buffers are initialized */
4745 if (!tr->trace_buffer.buffer)
4746 return 0;
4747
4748 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4749 if (ret < 0)
4750 return ret;
4751
4752 #ifdef CONFIG_TRACER_MAX_TRACE
4753 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4754 !tr->current_trace->use_max_tr)
4755 goto out;
4756
4757 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4758 if (ret < 0) {
4759 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4760 &tr->trace_buffer, cpu);
4761 if (r < 0) {
4762 /*
4763 * AARGH! We are left with different
4764 * size max buffer!!!!
4765 * The max buffer is our "snapshot" buffer.
4766 * When a tracer needs a snapshot (one of the
4767 * latency tracers), it swaps the max buffer
4768 * with the saved snap shot. We succeeded to
4769 * update the size of the main buffer, but failed to
4770 * update the size of the max buffer. But when we tried
4771 * to reset the main buffer to the original size, we
4772 * failed there too. This is very unlikely to
4773 * happen, but if it does, warn and kill all
4774 * tracing.
4775 */
4776 WARN_ON(1);
4777 tracing_disabled = 1;
4778 }
4779 return ret;
4780 }
4781
4782 if (cpu == RING_BUFFER_ALL_CPUS)
4783 set_buffer_entries(&tr->max_buffer, size);
4784 else
4785 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4786
4787 out:
4788 #endif /* CONFIG_TRACER_MAX_TRACE */
4789
4790 if (cpu == RING_BUFFER_ALL_CPUS)
4791 set_buffer_entries(&tr->trace_buffer, size);
4792 else
4793 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4794
4795 return ret;
4796 }
4797
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)4798 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4799 unsigned long size, int cpu_id)
4800 {
4801 int ret = size;
4802
4803 mutex_lock(&trace_types_lock);
4804
4805 if (cpu_id != RING_BUFFER_ALL_CPUS) {
4806 /* make sure, this cpu is enabled in the mask */
4807 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4808 ret = -EINVAL;
4809 goto out;
4810 }
4811 }
4812
4813 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4814 if (ret < 0)
4815 ret = -ENOMEM;
4816
4817 out:
4818 mutex_unlock(&trace_types_lock);
4819
4820 return ret;
4821 }
4822
4823
4824 /**
4825 * tracing_update_buffers - used by tracing facility to expand ring buffers
4826 *
4827 * To save on memory when the tracing is never used on a system with it
4828 * configured in. The ring buffers are set to a minimum size. But once
4829 * a user starts to use the tracing facility, then they need to grow
4830 * to their default size.
4831 *
4832 * This function is to be called when a tracer is about to be used.
4833 */
tracing_update_buffers(void)4834 int tracing_update_buffers(void)
4835 {
4836 int ret = 0;
4837
4838 mutex_lock(&trace_types_lock);
4839 if (!ring_buffer_expanded)
4840 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4841 RING_BUFFER_ALL_CPUS);
4842 mutex_unlock(&trace_types_lock);
4843
4844 return ret;
4845 }
4846
4847 struct trace_option_dentry;
4848
4849 static void
4850 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4851
4852 /*
4853 * Used to clear out the tracer before deletion of an instance.
4854 * Must have trace_types_lock held.
4855 */
tracing_set_nop(struct trace_array * tr)4856 static void tracing_set_nop(struct trace_array *tr)
4857 {
4858 if (tr->current_trace == &nop_trace)
4859 return;
4860
4861 tr->current_trace->enabled--;
4862
4863 if (tr->current_trace->reset)
4864 tr->current_trace->reset(tr);
4865
4866 tr->current_trace = &nop_trace;
4867 }
4868
add_tracer_options(struct trace_array * tr,struct tracer * t)4869 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4870 {
4871 /* Only enable if the directory has been created already. */
4872 if (!tr->dir)
4873 return;
4874
4875 create_trace_option_files(tr, t);
4876 }
4877
tracing_set_tracer(struct trace_array * tr,const char * buf)4878 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4879 {
4880 struct tracer *t;
4881 #ifdef CONFIG_TRACER_MAX_TRACE
4882 bool had_max_tr;
4883 #endif
4884 int ret = 0;
4885
4886 mutex_lock(&trace_types_lock);
4887
4888 if (!ring_buffer_expanded) {
4889 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4890 RING_BUFFER_ALL_CPUS);
4891 if (ret < 0)
4892 goto out;
4893 ret = 0;
4894 }
4895
4896 for (t = trace_types; t; t = t->next) {
4897 if (strcmp(t->name, buf) == 0)
4898 break;
4899 }
4900 if (!t) {
4901 ret = -EINVAL;
4902 goto out;
4903 }
4904 if (t == tr->current_trace)
4905 goto out;
4906
4907 /* Some tracers are only allowed for the top level buffer */
4908 if (!trace_ok_for_array(t, tr)) {
4909 ret = -EINVAL;
4910 goto out;
4911 }
4912
4913 /* If trace pipe files are being read, we can't change the tracer */
4914 if (tr->current_trace->ref) {
4915 ret = -EBUSY;
4916 goto out;
4917 }
4918
4919 trace_branch_disable();
4920
4921 tr->current_trace->enabled--;
4922
4923 if (tr->current_trace->reset)
4924 tr->current_trace->reset(tr);
4925
4926 /* Current trace needs to be nop_trace before synchronize_sched */
4927 tr->current_trace = &nop_trace;
4928
4929 #ifdef CONFIG_TRACER_MAX_TRACE
4930 had_max_tr = tr->allocated_snapshot;
4931
4932 if (had_max_tr && !t->use_max_tr) {
4933 /*
4934 * We need to make sure that the update_max_tr sees that
4935 * current_trace changed to nop_trace to keep it from
4936 * swapping the buffers after we resize it.
4937 * The update_max_tr is called from interrupts disabled
4938 * so a synchronized_sched() is sufficient.
4939 */
4940 synchronize_sched();
4941 free_snapshot(tr);
4942 }
4943 #endif
4944
4945 #ifdef CONFIG_TRACER_MAX_TRACE
4946 if (t->use_max_tr && !had_max_tr) {
4947 ret = alloc_snapshot(tr);
4948 if (ret < 0)
4949 goto out;
4950 }
4951 #endif
4952
4953 if (t->init) {
4954 ret = tracer_init(t, tr);
4955 if (ret)
4956 goto out;
4957 }
4958
4959 tr->current_trace = t;
4960 tr->current_trace->enabled++;
4961 trace_branch_enable(tr);
4962 out:
4963 mutex_unlock(&trace_types_lock);
4964
4965 return ret;
4966 }
4967
4968 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4969 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4970 size_t cnt, loff_t *ppos)
4971 {
4972 struct trace_array *tr = filp->private_data;
4973 char buf[MAX_TRACER_SIZE+1];
4974 int i;
4975 size_t ret;
4976 int err;
4977
4978 ret = cnt;
4979
4980 if (cnt > MAX_TRACER_SIZE)
4981 cnt = MAX_TRACER_SIZE;
4982
4983 if (copy_from_user(buf, ubuf, cnt))
4984 return -EFAULT;
4985
4986 buf[cnt] = 0;
4987
4988 /* strip ending whitespace. */
4989 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4990 buf[i] = 0;
4991
4992 err = tracing_set_tracer(tr, buf);
4993 if (err)
4994 return err;
4995
4996 *ppos += ret;
4997
4998 return ret;
4999 }
5000
5001 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5002 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5003 size_t cnt, loff_t *ppos)
5004 {
5005 char buf[64];
5006 int r;
5007
5008 r = snprintf(buf, sizeof(buf), "%ld\n",
5009 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5010 if (r > sizeof(buf))
5011 r = sizeof(buf);
5012 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5013 }
5014
5015 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5016 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5017 size_t cnt, loff_t *ppos)
5018 {
5019 unsigned long val;
5020 int ret;
5021
5022 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5023 if (ret)
5024 return ret;
5025
5026 *ptr = val * 1000;
5027
5028 return cnt;
5029 }
5030
5031 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5032 tracing_thresh_read(struct file *filp, char __user *ubuf,
5033 size_t cnt, loff_t *ppos)
5034 {
5035 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5036 }
5037
5038 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5039 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5040 size_t cnt, loff_t *ppos)
5041 {
5042 struct trace_array *tr = filp->private_data;
5043 int ret;
5044
5045 mutex_lock(&trace_types_lock);
5046 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5047 if (ret < 0)
5048 goto out;
5049
5050 if (tr->current_trace->update_thresh) {
5051 ret = tr->current_trace->update_thresh(tr);
5052 if (ret < 0)
5053 goto out;
5054 }
5055
5056 ret = cnt;
5057 out:
5058 mutex_unlock(&trace_types_lock);
5059
5060 return ret;
5061 }
5062
5063 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5064
5065 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5066 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5067 size_t cnt, loff_t *ppos)
5068 {
5069 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5070 }
5071
5072 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5073 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5074 size_t cnt, loff_t *ppos)
5075 {
5076 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5077 }
5078
5079 #endif
5080
tracing_open_pipe(struct inode * inode,struct file * filp)5081 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5082 {
5083 struct trace_array *tr = inode->i_private;
5084 struct trace_iterator *iter;
5085 int ret = 0;
5086
5087 if (tracing_disabled)
5088 return -ENODEV;
5089
5090 if (trace_array_get(tr) < 0)
5091 return -ENODEV;
5092
5093 mutex_lock(&trace_types_lock);
5094
5095 /* create a buffer to store the information to pass to userspace */
5096 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5097 if (!iter) {
5098 ret = -ENOMEM;
5099 __trace_array_put(tr);
5100 goto out;
5101 }
5102
5103 trace_seq_init(&iter->seq);
5104 iter->trace = tr->current_trace;
5105
5106 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5107 ret = -ENOMEM;
5108 goto fail;
5109 }
5110
5111 /* trace pipe does not show start of buffer */
5112 cpumask_setall(iter->started);
5113
5114 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5115 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5116
5117 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5118 if (trace_clocks[tr->clock_id].in_ns)
5119 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5120
5121 iter->tr = tr;
5122 iter->trace_buffer = &tr->trace_buffer;
5123 iter->cpu_file = tracing_get_cpu(inode);
5124 mutex_init(&iter->mutex);
5125 filp->private_data = iter;
5126
5127 if (iter->trace->pipe_open)
5128 iter->trace->pipe_open(iter);
5129
5130 nonseekable_open(inode, filp);
5131
5132 tr->current_trace->ref++;
5133 out:
5134 mutex_unlock(&trace_types_lock);
5135 return ret;
5136
5137 fail:
5138 kfree(iter->trace);
5139 kfree(iter);
5140 __trace_array_put(tr);
5141 mutex_unlock(&trace_types_lock);
5142 return ret;
5143 }
5144
tracing_release_pipe(struct inode * inode,struct file * file)5145 static int tracing_release_pipe(struct inode *inode, struct file *file)
5146 {
5147 struct trace_iterator *iter = file->private_data;
5148 struct trace_array *tr = inode->i_private;
5149
5150 mutex_lock(&trace_types_lock);
5151
5152 tr->current_trace->ref--;
5153
5154 if (iter->trace->pipe_close)
5155 iter->trace->pipe_close(iter);
5156
5157 mutex_unlock(&trace_types_lock);
5158
5159 free_cpumask_var(iter->started);
5160 mutex_destroy(&iter->mutex);
5161 kfree(iter);
5162
5163 trace_array_put(tr);
5164
5165 return 0;
5166 }
5167
5168 static unsigned int
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5169 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5170 {
5171 struct trace_array *tr = iter->tr;
5172
5173 /* Iterators are static, they should be filled or empty */
5174 if (trace_buffer_iter(iter, iter->cpu_file))
5175 return POLLIN | POLLRDNORM;
5176
5177 if (tr->trace_flags & TRACE_ITER_BLOCK)
5178 /*
5179 * Always select as readable when in blocking mode
5180 */
5181 return POLLIN | POLLRDNORM;
5182 else
5183 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5184 filp, poll_table);
5185 }
5186
5187 static unsigned int
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5188 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5189 {
5190 struct trace_iterator *iter = filp->private_data;
5191
5192 return trace_poll(iter, filp, poll_table);
5193 }
5194
5195 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5196 static int tracing_wait_pipe(struct file *filp)
5197 {
5198 struct trace_iterator *iter = filp->private_data;
5199 int ret;
5200
5201 while (trace_empty(iter)) {
5202
5203 if ((filp->f_flags & O_NONBLOCK)) {
5204 return -EAGAIN;
5205 }
5206
5207 /*
5208 * We block until we read something and tracing is disabled.
5209 * We still block if tracing is disabled, but we have never
5210 * read anything. This allows a user to cat this file, and
5211 * then enable tracing. But after we have read something,
5212 * we give an EOF when tracing is again disabled.
5213 *
5214 * iter->pos will be 0 if we haven't read anything.
5215 */
5216 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5217 break;
5218
5219 mutex_unlock(&iter->mutex);
5220
5221 ret = wait_on_pipe(iter, false);
5222
5223 mutex_lock(&iter->mutex);
5224
5225 if (ret)
5226 return ret;
5227 }
5228
5229 return 1;
5230 }
5231
5232 /*
5233 * Consumer reader.
5234 */
5235 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5236 tracing_read_pipe(struct file *filp, char __user *ubuf,
5237 size_t cnt, loff_t *ppos)
5238 {
5239 struct trace_iterator *iter = filp->private_data;
5240 ssize_t sret;
5241
5242 /*
5243 * Avoid more than one consumer on a single file descriptor
5244 * This is just a matter of traces coherency, the ring buffer itself
5245 * is protected.
5246 */
5247 mutex_lock(&iter->mutex);
5248
5249 /* return any leftover data */
5250 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5251 if (sret != -EBUSY)
5252 goto out;
5253
5254 trace_seq_init(&iter->seq);
5255
5256 if (iter->trace->read) {
5257 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5258 if (sret)
5259 goto out;
5260 }
5261
5262 waitagain:
5263 sret = tracing_wait_pipe(filp);
5264 if (sret <= 0)
5265 goto out;
5266
5267 /* stop when tracing is finished */
5268 if (trace_empty(iter)) {
5269 sret = 0;
5270 goto out;
5271 }
5272
5273 if (cnt >= PAGE_SIZE)
5274 cnt = PAGE_SIZE - 1;
5275
5276 /* reset all but tr, trace, and overruns */
5277 memset(&iter->seq, 0,
5278 sizeof(struct trace_iterator) -
5279 offsetof(struct trace_iterator, seq));
5280 cpumask_clear(iter->started);
5281 iter->pos = -1;
5282
5283 trace_event_read_lock();
5284 trace_access_lock(iter->cpu_file);
5285 while (trace_find_next_entry_inc(iter) != NULL) {
5286 enum print_line_t ret;
5287 int save_len = iter->seq.seq.len;
5288
5289 ret = print_trace_line(iter);
5290 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5291 /* don't print partial lines */
5292 iter->seq.seq.len = save_len;
5293 break;
5294 }
5295 if (ret != TRACE_TYPE_NO_CONSUME)
5296 trace_consume(iter);
5297
5298 if (trace_seq_used(&iter->seq) >= cnt)
5299 break;
5300
5301 /*
5302 * Setting the full flag means we reached the trace_seq buffer
5303 * size and we should leave by partial output condition above.
5304 * One of the trace_seq_* functions is not used properly.
5305 */
5306 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5307 iter->ent->type);
5308 }
5309 trace_access_unlock(iter->cpu_file);
5310 trace_event_read_unlock();
5311
5312 /* Now copy what we have to the user */
5313 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5314 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5315 trace_seq_init(&iter->seq);
5316
5317 /*
5318 * If there was nothing to send to user, in spite of consuming trace
5319 * entries, go back to wait for more entries.
5320 */
5321 if (sret == -EBUSY)
5322 goto waitagain;
5323
5324 out:
5325 mutex_unlock(&iter->mutex);
5326
5327 return sret;
5328 }
5329
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)5330 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5331 unsigned int idx)
5332 {
5333 __free_page(spd->pages[idx]);
5334 }
5335
5336 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5337 .can_merge = 0,
5338 .confirm = generic_pipe_buf_confirm,
5339 .release = generic_pipe_buf_release,
5340 .steal = generic_pipe_buf_steal,
5341 .get = generic_pipe_buf_get,
5342 };
5343
5344 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)5345 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5346 {
5347 size_t count;
5348 int save_len;
5349 int ret;
5350
5351 /* Seq buffer is page-sized, exactly what we need. */
5352 for (;;) {
5353 save_len = iter->seq.seq.len;
5354 ret = print_trace_line(iter);
5355
5356 if (trace_seq_has_overflowed(&iter->seq)) {
5357 iter->seq.seq.len = save_len;
5358 break;
5359 }
5360
5361 /*
5362 * This should not be hit, because it should only
5363 * be set if the iter->seq overflowed. But check it
5364 * anyway to be safe.
5365 */
5366 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5367 iter->seq.seq.len = save_len;
5368 break;
5369 }
5370
5371 count = trace_seq_used(&iter->seq) - save_len;
5372 if (rem < count) {
5373 rem = 0;
5374 iter->seq.seq.len = save_len;
5375 break;
5376 }
5377
5378 if (ret != TRACE_TYPE_NO_CONSUME)
5379 trace_consume(iter);
5380 rem -= count;
5381 if (!trace_find_next_entry_inc(iter)) {
5382 rem = 0;
5383 iter->ent = NULL;
5384 break;
5385 }
5386 }
5387
5388 return rem;
5389 }
5390
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)5391 static ssize_t tracing_splice_read_pipe(struct file *filp,
5392 loff_t *ppos,
5393 struct pipe_inode_info *pipe,
5394 size_t len,
5395 unsigned int flags)
5396 {
5397 struct page *pages_def[PIPE_DEF_BUFFERS];
5398 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5399 struct trace_iterator *iter = filp->private_data;
5400 struct splice_pipe_desc spd = {
5401 .pages = pages_def,
5402 .partial = partial_def,
5403 .nr_pages = 0, /* This gets updated below. */
5404 .nr_pages_max = PIPE_DEF_BUFFERS,
5405 .flags = flags,
5406 .ops = &tracing_pipe_buf_ops,
5407 .spd_release = tracing_spd_release_pipe,
5408 };
5409 ssize_t ret;
5410 size_t rem;
5411 unsigned int i;
5412
5413 if (splice_grow_spd(pipe, &spd))
5414 return -ENOMEM;
5415
5416 mutex_lock(&iter->mutex);
5417
5418 if (iter->trace->splice_read) {
5419 ret = iter->trace->splice_read(iter, filp,
5420 ppos, pipe, len, flags);
5421 if (ret)
5422 goto out_err;
5423 }
5424
5425 ret = tracing_wait_pipe(filp);
5426 if (ret <= 0)
5427 goto out_err;
5428
5429 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5430 ret = -EFAULT;
5431 goto out_err;
5432 }
5433
5434 trace_event_read_lock();
5435 trace_access_lock(iter->cpu_file);
5436
5437 /* Fill as many pages as possible. */
5438 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5439 spd.pages[i] = alloc_page(GFP_KERNEL);
5440 if (!spd.pages[i])
5441 break;
5442
5443 rem = tracing_fill_pipe_page(rem, iter);
5444
5445 /* Copy the data into the page, so we can start over. */
5446 ret = trace_seq_to_buffer(&iter->seq,
5447 page_address(spd.pages[i]),
5448 trace_seq_used(&iter->seq));
5449 if (ret < 0) {
5450 __free_page(spd.pages[i]);
5451 break;
5452 }
5453 spd.partial[i].offset = 0;
5454 spd.partial[i].len = trace_seq_used(&iter->seq);
5455
5456 trace_seq_init(&iter->seq);
5457 }
5458
5459 trace_access_unlock(iter->cpu_file);
5460 trace_event_read_unlock();
5461 mutex_unlock(&iter->mutex);
5462
5463 spd.nr_pages = i;
5464
5465 if (i)
5466 ret = splice_to_pipe(pipe, &spd);
5467 else
5468 ret = 0;
5469 out:
5470 splice_shrink_spd(&spd);
5471 return ret;
5472
5473 out_err:
5474 mutex_unlock(&iter->mutex);
5475 goto out;
5476 }
5477
5478 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5479 tracing_entries_read(struct file *filp, char __user *ubuf,
5480 size_t cnt, loff_t *ppos)
5481 {
5482 struct inode *inode = file_inode(filp);
5483 struct trace_array *tr = inode->i_private;
5484 int cpu = tracing_get_cpu(inode);
5485 char buf[64];
5486 int r = 0;
5487 ssize_t ret;
5488
5489 mutex_lock(&trace_types_lock);
5490
5491 if (cpu == RING_BUFFER_ALL_CPUS) {
5492 int cpu, buf_size_same;
5493 unsigned long size;
5494
5495 size = 0;
5496 buf_size_same = 1;
5497 /* check if all cpu sizes are same */
5498 for_each_tracing_cpu(cpu) {
5499 /* fill in the size from first enabled cpu */
5500 if (size == 0)
5501 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5502 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5503 buf_size_same = 0;
5504 break;
5505 }
5506 }
5507
5508 if (buf_size_same) {
5509 if (!ring_buffer_expanded)
5510 r = sprintf(buf, "%lu (expanded: %lu)\n",
5511 size >> 10,
5512 trace_buf_size >> 10);
5513 else
5514 r = sprintf(buf, "%lu\n", size >> 10);
5515 } else
5516 r = sprintf(buf, "X\n");
5517 } else
5518 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5519
5520 mutex_unlock(&trace_types_lock);
5521
5522 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5523 return ret;
5524 }
5525
5526 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5527 tracing_entries_write(struct file *filp, const char __user *ubuf,
5528 size_t cnt, loff_t *ppos)
5529 {
5530 struct inode *inode = file_inode(filp);
5531 struct trace_array *tr = inode->i_private;
5532 unsigned long val;
5533 int ret;
5534
5535 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5536 if (ret)
5537 return ret;
5538
5539 /* must have at least 1 entry */
5540 if (!val)
5541 return -EINVAL;
5542
5543 /* value is in KB */
5544 val <<= 10;
5545 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5546 if (ret < 0)
5547 return ret;
5548
5549 *ppos += cnt;
5550
5551 return cnt;
5552 }
5553
5554 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5555 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5556 size_t cnt, loff_t *ppos)
5557 {
5558 struct trace_array *tr = filp->private_data;
5559 char buf[64];
5560 int r, cpu;
5561 unsigned long size = 0, expanded_size = 0;
5562
5563 mutex_lock(&trace_types_lock);
5564 for_each_tracing_cpu(cpu) {
5565 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5566 if (!ring_buffer_expanded)
5567 expanded_size += trace_buf_size >> 10;
5568 }
5569 if (ring_buffer_expanded)
5570 r = sprintf(buf, "%lu\n", size);
5571 else
5572 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5573 mutex_unlock(&trace_types_lock);
5574
5575 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5576 }
5577
5578 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5579 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5580 size_t cnt, loff_t *ppos)
5581 {
5582 /*
5583 * There is no need to read what the user has written, this function
5584 * is just to make sure that there is no error when "echo" is used
5585 */
5586
5587 *ppos += cnt;
5588
5589 return cnt;
5590 }
5591
5592 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)5593 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5594 {
5595 struct trace_array *tr = inode->i_private;
5596
5597 /* disable tracing ? */
5598 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5599 tracer_tracing_off(tr);
5600 /* resize the ring buffer to 0 */
5601 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5602
5603 trace_array_put(tr);
5604
5605 return 0;
5606 }
5607
5608 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)5609 tracing_mark_write(struct file *filp, const char __user *ubuf,
5610 size_t cnt, loff_t *fpos)
5611 {
5612 unsigned long addr = (unsigned long)ubuf;
5613 struct trace_array *tr = filp->private_data;
5614 struct ring_buffer_event *event;
5615 struct ring_buffer *buffer;
5616 struct print_entry *entry;
5617 unsigned long irq_flags;
5618 struct page *pages[2];
5619 void *map_page[2];
5620 int nr_pages = 1;
5621 ssize_t written;
5622 int offset;
5623 int size;
5624 int len;
5625 int ret;
5626 int i;
5627
5628 if (tracing_disabled)
5629 return -EINVAL;
5630
5631 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5632 return -EINVAL;
5633
5634 if (cnt > TRACE_BUF_SIZE)
5635 cnt = TRACE_BUF_SIZE;
5636
5637 /*
5638 * Userspace is injecting traces into the kernel trace buffer.
5639 * We want to be as non intrusive as possible.
5640 * To do so, we do not want to allocate any special buffers
5641 * or take any locks, but instead write the userspace data
5642 * straight into the ring buffer.
5643 *
5644 * First we need to pin the userspace buffer into memory,
5645 * which, most likely it is, because it just referenced it.
5646 * But there's no guarantee that it is. By using get_user_pages_fast()
5647 * and kmap_atomic/kunmap_atomic() we can get access to the
5648 * pages directly. We then write the data directly into the
5649 * ring buffer.
5650 */
5651 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5652
5653 /* check if we cross pages */
5654 if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5655 nr_pages = 2;
5656
5657 offset = addr & (PAGE_SIZE - 1);
5658 addr &= PAGE_MASK;
5659
5660 ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5661 if (ret < nr_pages) {
5662 while (--ret >= 0)
5663 put_page(pages[ret]);
5664 written = -EFAULT;
5665 goto out;
5666 }
5667
5668 for (i = 0; i < nr_pages; i++)
5669 map_page[i] = kmap_atomic(pages[i]);
5670
5671 local_save_flags(irq_flags);
5672 size = sizeof(*entry) + cnt + 2; /* possible \n added */
5673 buffer = tr->trace_buffer.buffer;
5674 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5675 irq_flags, preempt_count());
5676 if (!event) {
5677 /* Ring buffer disabled, return as if not open for write */
5678 written = -EBADF;
5679 goto out_unlock;
5680 }
5681
5682 entry = ring_buffer_event_data(event);
5683 entry->ip = _THIS_IP_;
5684
5685 if (nr_pages == 2) {
5686 len = PAGE_SIZE - offset;
5687 memcpy(&entry->buf, map_page[0] + offset, len);
5688 memcpy(&entry->buf[len], map_page[1], cnt - len);
5689 } else
5690 memcpy(&entry->buf, map_page[0] + offset, cnt);
5691
5692 if (entry->buf[cnt - 1] != '\n') {
5693 entry->buf[cnt] = '\n';
5694 entry->buf[cnt + 1] = '\0';
5695 } else
5696 entry->buf[cnt] = '\0';
5697
5698 __buffer_unlock_commit(buffer, event);
5699
5700 written = cnt;
5701
5702 *fpos += written;
5703
5704 out_unlock:
5705 for (i = nr_pages - 1; i >= 0; i--) {
5706 kunmap_atomic(map_page[i]);
5707 put_page(pages[i]);
5708 }
5709 out:
5710 return written;
5711 }
5712
tracing_clock_show(struct seq_file * m,void * v)5713 static int tracing_clock_show(struct seq_file *m, void *v)
5714 {
5715 struct trace_array *tr = m->private;
5716 int i;
5717
5718 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5719 seq_printf(m,
5720 "%s%s%s%s", i ? " " : "",
5721 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5722 i == tr->clock_id ? "]" : "");
5723 seq_putc(m, '\n');
5724
5725 return 0;
5726 }
5727
tracing_set_clock(struct trace_array * tr,const char * clockstr)5728 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5729 {
5730 int i;
5731
5732 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5733 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5734 break;
5735 }
5736 if (i == ARRAY_SIZE(trace_clocks))
5737 return -EINVAL;
5738
5739 mutex_lock(&trace_types_lock);
5740
5741 tr->clock_id = i;
5742
5743 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5744
5745 /*
5746 * New clock may not be consistent with the previous clock.
5747 * Reset the buffer so that it doesn't have incomparable timestamps.
5748 */
5749 tracing_reset_online_cpus(&tr->trace_buffer);
5750
5751 #ifdef CONFIG_TRACER_MAX_TRACE
5752 if (tr->max_buffer.buffer)
5753 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5754 tracing_reset_online_cpus(&tr->max_buffer);
5755 #endif
5756
5757 mutex_unlock(&trace_types_lock);
5758
5759 return 0;
5760 }
5761
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)5762 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5763 size_t cnt, loff_t *fpos)
5764 {
5765 struct seq_file *m = filp->private_data;
5766 struct trace_array *tr = m->private;
5767 char buf[64];
5768 const char *clockstr;
5769 int ret;
5770
5771 if (cnt >= sizeof(buf))
5772 return -EINVAL;
5773
5774 if (copy_from_user(buf, ubuf, cnt))
5775 return -EFAULT;
5776
5777 buf[cnt] = 0;
5778
5779 clockstr = strstrip(buf);
5780
5781 ret = tracing_set_clock(tr, clockstr);
5782 if (ret)
5783 return ret;
5784
5785 *fpos += cnt;
5786
5787 return cnt;
5788 }
5789
tracing_clock_open(struct inode * inode,struct file * file)5790 static int tracing_clock_open(struct inode *inode, struct file *file)
5791 {
5792 struct trace_array *tr = inode->i_private;
5793 int ret;
5794
5795 if (tracing_disabled)
5796 return -ENODEV;
5797
5798 if (trace_array_get(tr))
5799 return -ENODEV;
5800
5801 ret = single_open(file, tracing_clock_show, inode->i_private);
5802 if (ret < 0)
5803 trace_array_put(tr);
5804
5805 return ret;
5806 }
5807
5808 struct ftrace_buffer_info {
5809 struct trace_iterator iter;
5810 void *spare;
5811 unsigned int read;
5812 };
5813
5814 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)5815 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5816 {
5817 struct trace_array *tr = inode->i_private;
5818 struct trace_iterator *iter;
5819 struct seq_file *m;
5820 int ret = 0;
5821
5822 if (trace_array_get(tr) < 0)
5823 return -ENODEV;
5824
5825 if (file->f_mode & FMODE_READ) {
5826 iter = __tracing_open(inode, file, true);
5827 if (IS_ERR(iter))
5828 ret = PTR_ERR(iter);
5829 } else {
5830 /* Writes still need the seq_file to hold the private data */
5831 ret = -ENOMEM;
5832 m = kzalloc(sizeof(*m), GFP_KERNEL);
5833 if (!m)
5834 goto out;
5835 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5836 if (!iter) {
5837 kfree(m);
5838 goto out;
5839 }
5840 ret = 0;
5841
5842 iter->tr = tr;
5843 iter->trace_buffer = &tr->max_buffer;
5844 iter->cpu_file = tracing_get_cpu(inode);
5845 m->private = iter;
5846 file->private_data = m;
5847 }
5848 out:
5849 if (ret < 0)
5850 trace_array_put(tr);
5851
5852 return ret;
5853 }
5854
5855 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5856 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5857 loff_t *ppos)
5858 {
5859 struct seq_file *m = filp->private_data;
5860 struct trace_iterator *iter = m->private;
5861 struct trace_array *tr = iter->tr;
5862 unsigned long val;
5863 int ret;
5864
5865 ret = tracing_update_buffers();
5866 if (ret < 0)
5867 return ret;
5868
5869 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5870 if (ret)
5871 return ret;
5872
5873 mutex_lock(&trace_types_lock);
5874
5875 if (tr->current_trace->use_max_tr) {
5876 ret = -EBUSY;
5877 goto out;
5878 }
5879
5880 switch (val) {
5881 case 0:
5882 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5883 ret = -EINVAL;
5884 break;
5885 }
5886 if (tr->allocated_snapshot)
5887 free_snapshot(tr);
5888 break;
5889 case 1:
5890 /* Only allow per-cpu swap if the ring buffer supports it */
5891 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5892 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5893 ret = -EINVAL;
5894 break;
5895 }
5896 #endif
5897 if (!tr->allocated_snapshot) {
5898 ret = alloc_snapshot(tr);
5899 if (ret < 0)
5900 break;
5901 }
5902 local_irq_disable();
5903 /* Now, we're going to swap */
5904 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5905 update_max_tr(tr, current, smp_processor_id());
5906 else
5907 update_max_tr_single(tr, current, iter->cpu_file);
5908 local_irq_enable();
5909 break;
5910 default:
5911 if (tr->allocated_snapshot) {
5912 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5913 tracing_reset_online_cpus(&tr->max_buffer);
5914 else
5915 tracing_reset(&tr->max_buffer, iter->cpu_file);
5916 }
5917 break;
5918 }
5919
5920 if (ret >= 0) {
5921 *ppos += cnt;
5922 ret = cnt;
5923 }
5924 out:
5925 mutex_unlock(&trace_types_lock);
5926 return ret;
5927 }
5928
tracing_snapshot_release(struct inode * inode,struct file * file)5929 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5930 {
5931 struct seq_file *m = file->private_data;
5932 int ret;
5933
5934 ret = tracing_release(inode, file);
5935
5936 if (file->f_mode & FMODE_READ)
5937 return ret;
5938
5939 /* If write only, the seq_file is just a stub */
5940 if (m)
5941 kfree(m->private);
5942 kfree(m);
5943
5944 return 0;
5945 }
5946
5947 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5948 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5949 size_t count, loff_t *ppos);
5950 static int tracing_buffers_release(struct inode *inode, struct file *file);
5951 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5952 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5953
snapshot_raw_open(struct inode * inode,struct file * filp)5954 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5955 {
5956 struct ftrace_buffer_info *info;
5957 int ret;
5958
5959 ret = tracing_buffers_open(inode, filp);
5960 if (ret < 0)
5961 return ret;
5962
5963 info = filp->private_data;
5964
5965 if (info->iter.trace->use_max_tr) {
5966 tracing_buffers_release(inode, filp);
5967 return -EBUSY;
5968 }
5969
5970 info->iter.snapshot = true;
5971 info->iter.trace_buffer = &info->iter.tr->max_buffer;
5972
5973 return ret;
5974 }
5975
5976 #endif /* CONFIG_TRACER_SNAPSHOT */
5977
5978
5979 static const struct file_operations tracing_thresh_fops = {
5980 .open = tracing_open_generic,
5981 .read = tracing_thresh_read,
5982 .write = tracing_thresh_write,
5983 .llseek = generic_file_llseek,
5984 };
5985
5986 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5987 static const struct file_operations tracing_max_lat_fops = {
5988 .open = tracing_open_generic,
5989 .read = tracing_max_lat_read,
5990 .write = tracing_max_lat_write,
5991 .llseek = generic_file_llseek,
5992 };
5993 #endif
5994
5995 static const struct file_operations set_tracer_fops = {
5996 .open = tracing_open_generic,
5997 .read = tracing_set_trace_read,
5998 .write = tracing_set_trace_write,
5999 .llseek = generic_file_llseek,
6000 };
6001
6002 static const struct file_operations tracing_pipe_fops = {
6003 .open = tracing_open_pipe,
6004 .poll = tracing_poll_pipe,
6005 .read = tracing_read_pipe,
6006 .splice_read = tracing_splice_read_pipe,
6007 .release = tracing_release_pipe,
6008 .llseek = no_llseek,
6009 };
6010
6011 static const struct file_operations tracing_entries_fops = {
6012 .open = tracing_open_generic_tr,
6013 .read = tracing_entries_read,
6014 .write = tracing_entries_write,
6015 .llseek = generic_file_llseek,
6016 .release = tracing_release_generic_tr,
6017 };
6018
6019 static const struct file_operations tracing_total_entries_fops = {
6020 .open = tracing_open_generic_tr,
6021 .read = tracing_total_entries_read,
6022 .llseek = generic_file_llseek,
6023 .release = tracing_release_generic_tr,
6024 };
6025
6026 static const struct file_operations tracing_free_buffer_fops = {
6027 .open = tracing_open_generic_tr,
6028 .write = tracing_free_buffer_write,
6029 .release = tracing_free_buffer_release,
6030 };
6031
6032 static const struct file_operations tracing_mark_fops = {
6033 .open = tracing_open_generic_tr,
6034 .write = tracing_mark_write,
6035 .llseek = generic_file_llseek,
6036 .release = tracing_release_generic_tr,
6037 };
6038
6039 static const struct file_operations trace_clock_fops = {
6040 .open = tracing_clock_open,
6041 .read = seq_read,
6042 .llseek = seq_lseek,
6043 .release = tracing_single_release_tr,
6044 .write = tracing_clock_write,
6045 };
6046
6047 #ifdef CONFIG_TRACER_SNAPSHOT
6048 static const struct file_operations snapshot_fops = {
6049 .open = tracing_snapshot_open,
6050 .read = seq_read,
6051 .write = tracing_snapshot_write,
6052 .llseek = tracing_lseek,
6053 .release = tracing_snapshot_release,
6054 };
6055
6056 static const struct file_operations snapshot_raw_fops = {
6057 .open = snapshot_raw_open,
6058 .read = tracing_buffers_read,
6059 .release = tracing_buffers_release,
6060 .splice_read = tracing_buffers_splice_read,
6061 .llseek = no_llseek,
6062 };
6063
6064 #endif /* CONFIG_TRACER_SNAPSHOT */
6065
tracing_buffers_open(struct inode * inode,struct file * filp)6066 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6067 {
6068 struct trace_array *tr = inode->i_private;
6069 struct ftrace_buffer_info *info;
6070 int ret;
6071
6072 if (tracing_disabled)
6073 return -ENODEV;
6074
6075 if (trace_array_get(tr) < 0)
6076 return -ENODEV;
6077
6078 info = kzalloc(sizeof(*info), GFP_KERNEL);
6079 if (!info) {
6080 trace_array_put(tr);
6081 return -ENOMEM;
6082 }
6083
6084 mutex_lock(&trace_types_lock);
6085
6086 info->iter.tr = tr;
6087 info->iter.cpu_file = tracing_get_cpu(inode);
6088 info->iter.trace = tr->current_trace;
6089 info->iter.trace_buffer = &tr->trace_buffer;
6090 info->spare = NULL;
6091 /* Force reading ring buffer for first read */
6092 info->read = (unsigned int)-1;
6093
6094 filp->private_data = info;
6095
6096 tr->current_trace->ref++;
6097
6098 mutex_unlock(&trace_types_lock);
6099
6100 ret = nonseekable_open(inode, filp);
6101 if (ret < 0)
6102 trace_array_put(tr);
6103
6104 return ret;
6105 }
6106
6107 static unsigned int
tracing_buffers_poll(struct file * filp,poll_table * poll_table)6108 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6109 {
6110 struct ftrace_buffer_info *info = filp->private_data;
6111 struct trace_iterator *iter = &info->iter;
6112
6113 return trace_poll(iter, filp, poll_table);
6114 }
6115
6116 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)6117 tracing_buffers_read(struct file *filp, char __user *ubuf,
6118 size_t count, loff_t *ppos)
6119 {
6120 struct ftrace_buffer_info *info = filp->private_data;
6121 struct trace_iterator *iter = &info->iter;
6122 ssize_t ret;
6123 ssize_t size;
6124
6125 if (!count)
6126 return 0;
6127
6128 #ifdef CONFIG_TRACER_MAX_TRACE
6129 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6130 return -EBUSY;
6131 #endif
6132
6133 if (!info->spare)
6134 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6135 iter->cpu_file);
6136 if (!info->spare)
6137 return -ENOMEM;
6138
6139 /* Do we have previous read data to read? */
6140 if (info->read < PAGE_SIZE)
6141 goto read;
6142
6143 again:
6144 trace_access_lock(iter->cpu_file);
6145 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6146 &info->spare,
6147 count,
6148 iter->cpu_file, 0);
6149 trace_access_unlock(iter->cpu_file);
6150
6151 if (ret < 0) {
6152 if (trace_empty(iter)) {
6153 if ((filp->f_flags & O_NONBLOCK))
6154 return -EAGAIN;
6155
6156 ret = wait_on_pipe(iter, false);
6157 if (ret)
6158 return ret;
6159
6160 goto again;
6161 }
6162 return 0;
6163 }
6164
6165 info->read = 0;
6166 read:
6167 size = PAGE_SIZE - info->read;
6168 if (size > count)
6169 size = count;
6170
6171 ret = copy_to_user(ubuf, info->spare + info->read, size);
6172 if (ret == size)
6173 return -EFAULT;
6174
6175 size -= ret;
6176
6177 *ppos += size;
6178 info->read += size;
6179
6180 return size;
6181 }
6182
tracing_buffers_release(struct inode * inode,struct file * file)6183 static int tracing_buffers_release(struct inode *inode, struct file *file)
6184 {
6185 struct ftrace_buffer_info *info = file->private_data;
6186 struct trace_iterator *iter = &info->iter;
6187
6188 mutex_lock(&trace_types_lock);
6189
6190 iter->tr->current_trace->ref--;
6191
6192 __trace_array_put(iter->tr);
6193
6194 if (info->spare)
6195 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6196 kfree(info);
6197
6198 mutex_unlock(&trace_types_lock);
6199
6200 return 0;
6201 }
6202
6203 struct buffer_ref {
6204 struct ring_buffer *buffer;
6205 void *page;
6206 int ref;
6207 };
6208
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)6209 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6210 struct pipe_buffer *buf)
6211 {
6212 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6213
6214 if (--ref->ref)
6215 return;
6216
6217 ring_buffer_free_read_page(ref->buffer, ref->page);
6218 kfree(ref);
6219 buf->private = 0;
6220 }
6221
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)6222 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6223 struct pipe_buffer *buf)
6224 {
6225 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6226
6227 ref->ref++;
6228 }
6229
6230 /* Pipe buffer operations for a buffer. */
6231 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6232 .can_merge = 0,
6233 .confirm = generic_pipe_buf_confirm,
6234 .release = buffer_pipe_buf_release,
6235 .steal = generic_pipe_buf_steal,
6236 .get = buffer_pipe_buf_get,
6237 };
6238
6239 /*
6240 * Callback from splice_to_pipe(), if we need to release some pages
6241 * at the end of the spd in case we error'ed out in filling the pipe.
6242 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)6243 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6244 {
6245 struct buffer_ref *ref =
6246 (struct buffer_ref *)spd->partial[i].private;
6247
6248 if (--ref->ref)
6249 return;
6250
6251 ring_buffer_free_read_page(ref->buffer, ref->page);
6252 kfree(ref);
6253 spd->partial[i].private = 0;
6254 }
6255
6256 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6257 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6258 struct pipe_inode_info *pipe, size_t len,
6259 unsigned int flags)
6260 {
6261 struct ftrace_buffer_info *info = file->private_data;
6262 struct trace_iterator *iter = &info->iter;
6263 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6264 struct page *pages_def[PIPE_DEF_BUFFERS];
6265 struct splice_pipe_desc spd = {
6266 .pages = pages_def,
6267 .partial = partial_def,
6268 .nr_pages_max = PIPE_DEF_BUFFERS,
6269 .flags = flags,
6270 .ops = &buffer_pipe_buf_ops,
6271 .spd_release = buffer_spd_release,
6272 };
6273 struct buffer_ref *ref;
6274 int entries, i;
6275 ssize_t ret = 0;
6276
6277 #ifdef CONFIG_TRACER_MAX_TRACE
6278 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6279 return -EBUSY;
6280 #endif
6281
6282 if (*ppos & (PAGE_SIZE - 1))
6283 return -EINVAL;
6284
6285 if (len & (PAGE_SIZE - 1)) {
6286 if (len < PAGE_SIZE)
6287 return -EINVAL;
6288 len &= PAGE_MASK;
6289 }
6290
6291 if (splice_grow_spd(pipe, &spd))
6292 return -ENOMEM;
6293
6294 again:
6295 trace_access_lock(iter->cpu_file);
6296 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6297
6298 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6299 struct page *page;
6300 int r;
6301
6302 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6303 if (!ref) {
6304 ret = -ENOMEM;
6305 break;
6306 }
6307
6308 ref->ref = 1;
6309 ref->buffer = iter->trace_buffer->buffer;
6310 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6311 if (!ref->page) {
6312 ret = -ENOMEM;
6313 kfree(ref);
6314 break;
6315 }
6316
6317 r = ring_buffer_read_page(ref->buffer, &ref->page,
6318 len, iter->cpu_file, 1);
6319 if (r < 0) {
6320 ring_buffer_free_read_page(ref->buffer, ref->page);
6321 kfree(ref);
6322 break;
6323 }
6324
6325 page = virt_to_page(ref->page);
6326
6327 spd.pages[i] = page;
6328 spd.partial[i].len = PAGE_SIZE;
6329 spd.partial[i].offset = 0;
6330 spd.partial[i].private = (unsigned long)ref;
6331 spd.nr_pages++;
6332 *ppos += PAGE_SIZE;
6333
6334 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6335 }
6336
6337 trace_access_unlock(iter->cpu_file);
6338 spd.nr_pages = i;
6339
6340 /* did we read anything? */
6341 if (!spd.nr_pages) {
6342 if (ret)
6343 goto out;
6344
6345 ret = -EAGAIN;
6346 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6347 goto out;
6348
6349 ret = wait_on_pipe(iter, true);
6350 if (ret)
6351 goto out;
6352
6353 goto again;
6354 }
6355
6356 ret = splice_to_pipe(pipe, &spd);
6357 out:
6358 splice_shrink_spd(&spd);
6359
6360 return ret;
6361 }
6362
6363 static const struct file_operations tracing_buffers_fops = {
6364 .open = tracing_buffers_open,
6365 .read = tracing_buffers_read,
6366 .poll = tracing_buffers_poll,
6367 .release = tracing_buffers_release,
6368 .splice_read = tracing_buffers_splice_read,
6369 .llseek = no_llseek,
6370 };
6371
6372 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)6373 tracing_stats_read(struct file *filp, char __user *ubuf,
6374 size_t count, loff_t *ppos)
6375 {
6376 struct inode *inode = file_inode(filp);
6377 struct trace_array *tr = inode->i_private;
6378 struct trace_buffer *trace_buf = &tr->trace_buffer;
6379 int cpu = tracing_get_cpu(inode);
6380 struct trace_seq *s;
6381 unsigned long cnt;
6382 unsigned long long t;
6383 unsigned long usec_rem;
6384
6385 s = kmalloc(sizeof(*s), GFP_KERNEL);
6386 if (!s)
6387 return -ENOMEM;
6388
6389 trace_seq_init(s);
6390
6391 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6392 trace_seq_printf(s, "entries: %ld\n", cnt);
6393
6394 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6395 trace_seq_printf(s, "overrun: %ld\n", cnt);
6396
6397 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6398 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6399
6400 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6401 trace_seq_printf(s, "bytes: %ld\n", cnt);
6402
6403 if (trace_clocks[tr->clock_id].in_ns) {
6404 /* local or global for trace_clock */
6405 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6406 usec_rem = do_div(t, USEC_PER_SEC);
6407 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6408 t, usec_rem);
6409
6410 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6411 usec_rem = do_div(t, USEC_PER_SEC);
6412 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6413 } else {
6414 /* counter or tsc mode for trace_clock */
6415 trace_seq_printf(s, "oldest event ts: %llu\n",
6416 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6417
6418 trace_seq_printf(s, "now ts: %llu\n",
6419 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6420 }
6421
6422 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6423 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6424
6425 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6426 trace_seq_printf(s, "read events: %ld\n", cnt);
6427
6428 count = simple_read_from_buffer(ubuf, count, ppos,
6429 s->buffer, trace_seq_used(s));
6430
6431 kfree(s);
6432
6433 return count;
6434 }
6435
6436 static const struct file_operations tracing_stats_fops = {
6437 .open = tracing_open_generic_tr,
6438 .read = tracing_stats_read,
6439 .llseek = generic_file_llseek,
6440 .release = tracing_release_generic_tr,
6441 };
6442
6443 #ifdef CONFIG_DYNAMIC_FTRACE
6444
ftrace_arch_read_dyn_info(char * buf,int size)6445 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6446 {
6447 return 0;
6448 }
6449
6450 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6451 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6452 size_t cnt, loff_t *ppos)
6453 {
6454 static char ftrace_dyn_info_buffer[1024];
6455 static DEFINE_MUTEX(dyn_info_mutex);
6456 unsigned long *p = filp->private_data;
6457 char *buf = ftrace_dyn_info_buffer;
6458 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6459 int r;
6460
6461 mutex_lock(&dyn_info_mutex);
6462 r = sprintf(buf, "%ld ", *p);
6463
6464 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6465 buf[r++] = '\n';
6466
6467 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6468
6469 mutex_unlock(&dyn_info_mutex);
6470
6471 return r;
6472 }
6473
6474 static const struct file_operations tracing_dyn_info_fops = {
6475 .open = tracing_open_generic,
6476 .read = tracing_read_dyn_info,
6477 .llseek = generic_file_llseek,
6478 };
6479 #endif /* CONFIG_DYNAMIC_FTRACE */
6480
6481 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6482 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,void ** data)6483 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6484 {
6485 tracing_snapshot();
6486 }
6487
6488 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,void ** data)6489 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6490 {
6491 unsigned long *count = (long *)data;
6492
6493 if (!*count)
6494 return;
6495
6496 if (*count != -1)
6497 (*count)--;
6498
6499 tracing_snapshot();
6500 }
6501
6502 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)6503 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6504 struct ftrace_probe_ops *ops, void *data)
6505 {
6506 long count = (long)data;
6507
6508 seq_printf(m, "%ps:", (void *)ip);
6509
6510 seq_puts(m, "snapshot");
6511
6512 if (count == -1)
6513 seq_puts(m, ":unlimited\n");
6514 else
6515 seq_printf(m, ":count=%ld\n", count);
6516
6517 return 0;
6518 }
6519
6520 static struct ftrace_probe_ops snapshot_probe_ops = {
6521 .func = ftrace_snapshot,
6522 .print = ftrace_snapshot_print,
6523 };
6524
6525 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6526 .func = ftrace_count_snapshot,
6527 .print = ftrace_snapshot_print,
6528 };
6529
6530 static int
ftrace_trace_snapshot_callback(struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)6531 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6532 char *glob, char *cmd, char *param, int enable)
6533 {
6534 struct ftrace_probe_ops *ops;
6535 void *count = (void *)-1;
6536 char *number;
6537 int ret;
6538
6539 /* hash funcs only work with set_ftrace_filter */
6540 if (!enable)
6541 return -EINVAL;
6542
6543 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
6544
6545 if (glob[0] == '!') {
6546 unregister_ftrace_function_probe_func(glob+1, ops);
6547 return 0;
6548 }
6549
6550 if (!param)
6551 goto out_reg;
6552
6553 number = strsep(¶m, ":");
6554
6555 if (!strlen(number))
6556 goto out_reg;
6557
6558 /*
6559 * We use the callback data field (which is a pointer)
6560 * as our counter.
6561 */
6562 ret = kstrtoul(number, 0, (unsigned long *)&count);
6563 if (ret)
6564 return ret;
6565
6566 out_reg:
6567 ret = alloc_snapshot(&global_trace);
6568 if (ret < 0)
6569 goto out;
6570
6571 ret = register_ftrace_function_probe(glob, ops, count);
6572
6573 out:
6574 return ret < 0 ? ret : 0;
6575 }
6576
6577 static struct ftrace_func_command ftrace_snapshot_cmd = {
6578 .name = "snapshot",
6579 .func = ftrace_trace_snapshot_callback,
6580 };
6581
register_snapshot_cmd(void)6582 static __init int register_snapshot_cmd(void)
6583 {
6584 return register_ftrace_command(&ftrace_snapshot_cmd);
6585 }
6586 #else
register_snapshot_cmd(void)6587 static inline __init int register_snapshot_cmd(void) { return 0; }
6588 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6589
tracing_get_dentry(struct trace_array * tr)6590 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6591 {
6592 if (WARN_ON(!tr->dir))
6593 return ERR_PTR(-ENODEV);
6594
6595 /* Top directory uses NULL as the parent */
6596 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6597 return NULL;
6598
6599 /* All sub buffers have a descriptor */
6600 return tr->dir;
6601 }
6602
tracing_dentry_percpu(struct trace_array * tr,int cpu)6603 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6604 {
6605 struct dentry *d_tracer;
6606
6607 if (tr->percpu_dir)
6608 return tr->percpu_dir;
6609
6610 d_tracer = tracing_get_dentry(tr);
6611 if (IS_ERR(d_tracer))
6612 return NULL;
6613
6614 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6615
6616 WARN_ONCE(!tr->percpu_dir,
6617 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6618
6619 return tr->percpu_dir;
6620 }
6621
6622 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)6623 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6624 void *data, long cpu, const struct file_operations *fops)
6625 {
6626 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6627
6628 if (ret) /* See tracing_get_cpu() */
6629 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6630 return ret;
6631 }
6632
6633 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)6634 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6635 {
6636 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6637 struct dentry *d_cpu;
6638 char cpu_dir[30]; /* 30 characters should be more than enough */
6639
6640 if (!d_percpu)
6641 return;
6642
6643 snprintf(cpu_dir, 30, "cpu%ld", cpu);
6644 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6645 if (!d_cpu) {
6646 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6647 return;
6648 }
6649
6650 /* per cpu trace_pipe */
6651 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6652 tr, cpu, &tracing_pipe_fops);
6653
6654 /* per cpu trace */
6655 trace_create_cpu_file("trace", 0644, d_cpu,
6656 tr, cpu, &tracing_fops);
6657
6658 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6659 tr, cpu, &tracing_buffers_fops);
6660
6661 trace_create_cpu_file("stats", 0444, d_cpu,
6662 tr, cpu, &tracing_stats_fops);
6663
6664 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6665 tr, cpu, &tracing_entries_fops);
6666
6667 #ifdef CONFIG_TRACER_SNAPSHOT
6668 trace_create_cpu_file("snapshot", 0644, d_cpu,
6669 tr, cpu, &snapshot_fops);
6670
6671 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6672 tr, cpu, &snapshot_raw_fops);
6673 #endif
6674 }
6675
6676 #ifdef CONFIG_FTRACE_SELFTEST
6677 /* Let selftest have access to static functions in this file */
6678 #include "trace_selftest.c"
6679 #endif
6680
6681 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6682 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6683 loff_t *ppos)
6684 {
6685 struct trace_option_dentry *topt = filp->private_data;
6686 char *buf;
6687
6688 if (topt->flags->val & topt->opt->bit)
6689 buf = "1\n";
6690 else
6691 buf = "0\n";
6692
6693 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6694 }
6695
6696 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6697 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6698 loff_t *ppos)
6699 {
6700 struct trace_option_dentry *topt = filp->private_data;
6701 unsigned long val;
6702 int ret;
6703
6704 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6705 if (ret)
6706 return ret;
6707
6708 if (val != 0 && val != 1)
6709 return -EINVAL;
6710
6711 if (!!(topt->flags->val & topt->opt->bit) != val) {
6712 mutex_lock(&trace_types_lock);
6713 ret = __set_tracer_option(topt->tr, topt->flags,
6714 topt->opt, !val);
6715 mutex_unlock(&trace_types_lock);
6716 if (ret)
6717 return ret;
6718 }
6719
6720 *ppos += cnt;
6721
6722 return cnt;
6723 }
6724
6725
6726 static const struct file_operations trace_options_fops = {
6727 .open = tracing_open_generic,
6728 .read = trace_options_read,
6729 .write = trace_options_write,
6730 .llseek = generic_file_llseek,
6731 };
6732
6733 /*
6734 * In order to pass in both the trace_array descriptor as well as the index
6735 * to the flag that the trace option file represents, the trace_array
6736 * has a character array of trace_flags_index[], which holds the index
6737 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6738 * The address of this character array is passed to the flag option file
6739 * read/write callbacks.
6740 *
6741 * In order to extract both the index and the trace_array descriptor,
6742 * get_tr_index() uses the following algorithm.
6743 *
6744 * idx = *ptr;
6745 *
6746 * As the pointer itself contains the address of the index (remember
6747 * index[1] == 1).
6748 *
6749 * Then to get the trace_array descriptor, by subtracting that index
6750 * from the ptr, we get to the start of the index itself.
6751 *
6752 * ptr - idx == &index[0]
6753 *
6754 * Then a simple container_of() from that pointer gets us to the
6755 * trace_array descriptor.
6756 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)6757 static void get_tr_index(void *data, struct trace_array **ptr,
6758 unsigned int *pindex)
6759 {
6760 *pindex = *(unsigned char *)data;
6761
6762 *ptr = container_of(data - *pindex, struct trace_array,
6763 trace_flags_index);
6764 }
6765
6766 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6767 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6768 loff_t *ppos)
6769 {
6770 void *tr_index = filp->private_data;
6771 struct trace_array *tr;
6772 unsigned int index;
6773 char *buf;
6774
6775 get_tr_index(tr_index, &tr, &index);
6776
6777 if (tr->trace_flags & (1 << index))
6778 buf = "1\n";
6779 else
6780 buf = "0\n";
6781
6782 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6783 }
6784
6785 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6786 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6787 loff_t *ppos)
6788 {
6789 void *tr_index = filp->private_data;
6790 struct trace_array *tr;
6791 unsigned int index;
6792 unsigned long val;
6793 int ret;
6794
6795 get_tr_index(tr_index, &tr, &index);
6796
6797 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6798 if (ret)
6799 return ret;
6800
6801 if (val != 0 && val != 1)
6802 return -EINVAL;
6803
6804 mutex_lock(&trace_types_lock);
6805 ret = set_tracer_flag(tr, 1 << index, val);
6806 mutex_unlock(&trace_types_lock);
6807
6808 if (ret < 0)
6809 return ret;
6810
6811 *ppos += cnt;
6812
6813 return cnt;
6814 }
6815
6816 static const struct file_operations trace_options_core_fops = {
6817 .open = tracing_open_generic,
6818 .read = trace_options_core_read,
6819 .write = trace_options_core_write,
6820 .llseek = generic_file_llseek,
6821 };
6822
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)6823 struct dentry *trace_create_file(const char *name,
6824 umode_t mode,
6825 struct dentry *parent,
6826 void *data,
6827 const struct file_operations *fops)
6828 {
6829 struct dentry *ret;
6830
6831 ret = tracefs_create_file(name, mode, parent, data, fops);
6832 if (!ret)
6833 pr_warn("Could not create tracefs '%s' entry\n", name);
6834
6835 return ret;
6836 }
6837
6838
trace_options_init_dentry(struct trace_array * tr)6839 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6840 {
6841 struct dentry *d_tracer;
6842
6843 if (tr->options)
6844 return tr->options;
6845
6846 d_tracer = tracing_get_dentry(tr);
6847 if (IS_ERR(d_tracer))
6848 return NULL;
6849
6850 tr->options = tracefs_create_dir("options", d_tracer);
6851 if (!tr->options) {
6852 pr_warn("Could not create tracefs directory 'options'\n");
6853 return NULL;
6854 }
6855
6856 return tr->options;
6857 }
6858
6859 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)6860 create_trace_option_file(struct trace_array *tr,
6861 struct trace_option_dentry *topt,
6862 struct tracer_flags *flags,
6863 struct tracer_opt *opt)
6864 {
6865 struct dentry *t_options;
6866
6867 t_options = trace_options_init_dentry(tr);
6868 if (!t_options)
6869 return;
6870
6871 topt->flags = flags;
6872 topt->opt = opt;
6873 topt->tr = tr;
6874
6875 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6876 &trace_options_fops);
6877
6878 }
6879
6880 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)6881 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6882 {
6883 struct trace_option_dentry *topts;
6884 struct trace_options *tr_topts;
6885 struct tracer_flags *flags;
6886 struct tracer_opt *opts;
6887 int cnt;
6888 int i;
6889
6890 if (!tracer)
6891 return;
6892
6893 flags = tracer->flags;
6894
6895 if (!flags || !flags->opts)
6896 return;
6897
6898 /*
6899 * If this is an instance, only create flags for tracers
6900 * the instance may have.
6901 */
6902 if (!trace_ok_for_array(tracer, tr))
6903 return;
6904
6905 for (i = 0; i < tr->nr_topts; i++) {
6906 /* Make sure there's no duplicate flags. */
6907 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6908 return;
6909 }
6910
6911 opts = flags->opts;
6912
6913 for (cnt = 0; opts[cnt].name; cnt++)
6914 ;
6915
6916 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6917 if (!topts)
6918 return;
6919
6920 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6921 GFP_KERNEL);
6922 if (!tr_topts) {
6923 kfree(topts);
6924 return;
6925 }
6926
6927 tr->topts = tr_topts;
6928 tr->topts[tr->nr_topts].tracer = tracer;
6929 tr->topts[tr->nr_topts].topts = topts;
6930 tr->nr_topts++;
6931
6932 for (cnt = 0; opts[cnt].name; cnt++) {
6933 create_trace_option_file(tr, &topts[cnt], flags,
6934 &opts[cnt]);
6935 WARN_ONCE(topts[cnt].entry == NULL,
6936 "Failed to create trace option: %s",
6937 opts[cnt].name);
6938 }
6939 }
6940
6941 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)6942 create_trace_option_core_file(struct trace_array *tr,
6943 const char *option, long index)
6944 {
6945 struct dentry *t_options;
6946
6947 t_options = trace_options_init_dentry(tr);
6948 if (!t_options)
6949 return NULL;
6950
6951 return trace_create_file(option, 0644, t_options,
6952 (void *)&tr->trace_flags_index[index],
6953 &trace_options_core_fops);
6954 }
6955
create_trace_options_dir(struct trace_array * tr)6956 static void create_trace_options_dir(struct trace_array *tr)
6957 {
6958 struct dentry *t_options;
6959 bool top_level = tr == &global_trace;
6960 int i;
6961
6962 t_options = trace_options_init_dentry(tr);
6963 if (!t_options)
6964 return;
6965
6966 for (i = 0; trace_options[i]; i++) {
6967 if (top_level ||
6968 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6969 create_trace_option_core_file(tr, trace_options[i], i);
6970 }
6971 }
6972
6973 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6974 rb_simple_read(struct file *filp, char __user *ubuf,
6975 size_t cnt, loff_t *ppos)
6976 {
6977 struct trace_array *tr = filp->private_data;
6978 char buf[64];
6979 int r;
6980
6981 r = tracer_tracing_is_on(tr);
6982 r = sprintf(buf, "%d\n", r);
6983
6984 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6985 }
6986
6987 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6988 rb_simple_write(struct file *filp, const char __user *ubuf,
6989 size_t cnt, loff_t *ppos)
6990 {
6991 struct trace_array *tr = filp->private_data;
6992 struct ring_buffer *buffer = tr->trace_buffer.buffer;
6993 unsigned long val;
6994 int ret;
6995
6996 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6997 if (ret)
6998 return ret;
6999
7000 if (buffer) {
7001 mutex_lock(&trace_types_lock);
7002 if (val) {
7003 tracer_tracing_on(tr);
7004 if (tr->current_trace->start)
7005 tr->current_trace->start(tr);
7006 } else {
7007 tracer_tracing_off(tr);
7008 if (tr->current_trace->stop)
7009 tr->current_trace->stop(tr);
7010 }
7011 mutex_unlock(&trace_types_lock);
7012 }
7013
7014 (*ppos)++;
7015
7016 return cnt;
7017 }
7018
7019 static const struct file_operations rb_simple_fops = {
7020 .open = tracing_open_generic_tr,
7021 .read = rb_simple_read,
7022 .write = rb_simple_write,
7023 .release = tracing_release_generic_tr,
7024 .llseek = default_llseek,
7025 };
7026
7027 struct dentry *trace_instance_dir;
7028
7029 static void
7030 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7031
7032 static int
allocate_trace_buffer(struct trace_array * tr,struct trace_buffer * buf,int size)7033 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7034 {
7035 enum ring_buffer_flags rb_flags;
7036
7037 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7038
7039 buf->tr = tr;
7040
7041 buf->buffer = ring_buffer_alloc(size, rb_flags);
7042 if (!buf->buffer)
7043 return -ENOMEM;
7044
7045 buf->data = alloc_percpu(struct trace_array_cpu);
7046 if (!buf->data) {
7047 ring_buffer_free(buf->buffer);
7048 buf->buffer = NULL;
7049 return -ENOMEM;
7050 }
7051
7052 /* Allocate the first page for all buffers */
7053 set_buffer_entries(&tr->trace_buffer,
7054 ring_buffer_size(tr->trace_buffer.buffer, 0));
7055
7056 return 0;
7057 }
7058
allocate_trace_buffers(struct trace_array * tr,int size)7059 static int allocate_trace_buffers(struct trace_array *tr, int size)
7060 {
7061 int ret;
7062
7063 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7064 if (ret)
7065 return ret;
7066
7067 #ifdef CONFIG_TRACER_MAX_TRACE
7068 ret = allocate_trace_buffer(tr, &tr->max_buffer,
7069 allocate_snapshot ? size : 1);
7070 if (WARN_ON(ret)) {
7071 ring_buffer_free(tr->trace_buffer.buffer);
7072 tr->trace_buffer.buffer = NULL;
7073 free_percpu(tr->trace_buffer.data);
7074 tr->trace_buffer.data = NULL;
7075 return -ENOMEM;
7076 }
7077 tr->allocated_snapshot = allocate_snapshot;
7078
7079 /*
7080 * Only the top level trace array gets its snapshot allocated
7081 * from the kernel command line.
7082 */
7083 allocate_snapshot = false;
7084 #endif
7085 return 0;
7086 }
7087
free_trace_buffer(struct trace_buffer * buf)7088 static void free_trace_buffer(struct trace_buffer *buf)
7089 {
7090 if (buf->buffer) {
7091 ring_buffer_free(buf->buffer);
7092 buf->buffer = NULL;
7093 free_percpu(buf->data);
7094 buf->data = NULL;
7095 }
7096 }
7097
free_trace_buffers(struct trace_array * tr)7098 static void free_trace_buffers(struct trace_array *tr)
7099 {
7100 if (!tr)
7101 return;
7102
7103 free_trace_buffer(&tr->trace_buffer);
7104
7105 #ifdef CONFIG_TRACER_MAX_TRACE
7106 free_trace_buffer(&tr->max_buffer);
7107 #endif
7108 }
7109
init_trace_flags_index(struct trace_array * tr)7110 static void init_trace_flags_index(struct trace_array *tr)
7111 {
7112 int i;
7113
7114 /* Used by the trace options files */
7115 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7116 tr->trace_flags_index[i] = i;
7117 }
7118
__update_tracer_options(struct trace_array * tr)7119 static void __update_tracer_options(struct trace_array *tr)
7120 {
7121 struct tracer *t;
7122
7123 for (t = trace_types; t; t = t->next)
7124 add_tracer_options(tr, t);
7125 }
7126
update_tracer_options(struct trace_array * tr)7127 static void update_tracer_options(struct trace_array *tr)
7128 {
7129 mutex_lock(&trace_types_lock);
7130 __update_tracer_options(tr);
7131 mutex_unlock(&trace_types_lock);
7132 }
7133
instance_mkdir(const char * name)7134 static int instance_mkdir(const char *name)
7135 {
7136 struct trace_array *tr;
7137 int ret;
7138
7139 mutex_lock(&trace_types_lock);
7140
7141 ret = -EEXIST;
7142 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7143 if (tr->name && strcmp(tr->name, name) == 0)
7144 goto out_unlock;
7145 }
7146
7147 ret = -ENOMEM;
7148 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7149 if (!tr)
7150 goto out_unlock;
7151
7152 tr->name = kstrdup(name, GFP_KERNEL);
7153 if (!tr->name)
7154 goto out_free_tr;
7155
7156 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7157 goto out_free_tr;
7158
7159 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7160
7161 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7162
7163 raw_spin_lock_init(&tr->start_lock);
7164
7165 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7166
7167 tr->current_trace = &nop_trace;
7168
7169 INIT_LIST_HEAD(&tr->systems);
7170 INIT_LIST_HEAD(&tr->events);
7171
7172 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7173 goto out_free_tr;
7174
7175 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7176 if (!tr->dir)
7177 goto out_free_tr;
7178
7179 ret = event_trace_add_tracer(tr->dir, tr);
7180 if (ret) {
7181 tracefs_remove_recursive(tr->dir);
7182 goto out_free_tr;
7183 }
7184
7185 init_tracer_tracefs(tr, tr->dir);
7186 init_trace_flags_index(tr);
7187 __update_tracer_options(tr);
7188
7189 list_add(&tr->list, &ftrace_trace_arrays);
7190
7191 mutex_unlock(&trace_types_lock);
7192
7193 return 0;
7194
7195 out_free_tr:
7196 free_trace_buffers(tr);
7197 free_cpumask_var(tr->tracing_cpumask);
7198 kfree(tr->name);
7199 kfree(tr);
7200
7201 out_unlock:
7202 mutex_unlock(&trace_types_lock);
7203
7204 return ret;
7205
7206 }
7207
instance_rmdir(const char * name)7208 static int instance_rmdir(const char *name)
7209 {
7210 struct trace_array *tr;
7211 int found = 0;
7212 int ret;
7213 int i;
7214
7215 mutex_lock(&trace_types_lock);
7216
7217 ret = -ENODEV;
7218 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7219 if (tr->name && strcmp(tr->name, name) == 0) {
7220 found = 1;
7221 break;
7222 }
7223 }
7224 if (!found)
7225 goto out_unlock;
7226
7227 ret = -EBUSY;
7228 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7229 goto out_unlock;
7230
7231 list_del(&tr->list);
7232
7233 /* Disable all the flags that were enabled coming in */
7234 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7235 if ((1 << i) & ZEROED_TRACE_FLAGS)
7236 set_tracer_flag(tr, 1 << i, 0);
7237 }
7238
7239 tracing_set_nop(tr);
7240 event_trace_del_tracer(tr);
7241 ftrace_clear_pids(tr);
7242 ftrace_destroy_function_files(tr);
7243 tracefs_remove_recursive(tr->dir);
7244 free_trace_buffers(tr);
7245
7246 for (i = 0; i < tr->nr_topts; i++) {
7247 kfree(tr->topts[i].topts);
7248 }
7249 kfree(tr->topts);
7250
7251 free_cpumask_var(tr->tracing_cpumask);
7252 kfree(tr->name);
7253 kfree(tr);
7254
7255 ret = 0;
7256
7257 out_unlock:
7258 mutex_unlock(&trace_types_lock);
7259
7260 return ret;
7261 }
7262
create_trace_instances(struct dentry * d_tracer)7263 static __init void create_trace_instances(struct dentry *d_tracer)
7264 {
7265 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7266 instance_mkdir,
7267 instance_rmdir);
7268 if (WARN_ON(!trace_instance_dir))
7269 return;
7270 }
7271
7272 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)7273 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7274 {
7275 int cpu;
7276
7277 trace_create_file("available_tracers", 0444, d_tracer,
7278 tr, &show_traces_fops);
7279
7280 trace_create_file("current_tracer", 0644, d_tracer,
7281 tr, &set_tracer_fops);
7282
7283 trace_create_file("tracing_cpumask", 0644, d_tracer,
7284 tr, &tracing_cpumask_fops);
7285
7286 trace_create_file("trace_options", 0644, d_tracer,
7287 tr, &tracing_iter_fops);
7288
7289 trace_create_file("trace", 0644, d_tracer,
7290 tr, &tracing_fops);
7291
7292 trace_create_file("trace_pipe", 0444, d_tracer,
7293 tr, &tracing_pipe_fops);
7294
7295 trace_create_file("buffer_size_kb", 0644, d_tracer,
7296 tr, &tracing_entries_fops);
7297
7298 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7299 tr, &tracing_total_entries_fops);
7300
7301 trace_create_file("free_buffer", 0200, d_tracer,
7302 tr, &tracing_free_buffer_fops);
7303
7304 trace_create_file("trace_marker", 0220, d_tracer,
7305 tr, &tracing_mark_fops);
7306
7307 trace_create_file("saved_tgids", 0444, d_tracer,
7308 tr, &tracing_saved_tgids_fops);
7309
7310 trace_create_file("trace_clock", 0644, d_tracer, tr,
7311 &trace_clock_fops);
7312
7313 trace_create_file("tracing_on", 0644, d_tracer,
7314 tr, &rb_simple_fops);
7315
7316 create_trace_options_dir(tr);
7317
7318 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7319 trace_create_file("tracing_max_latency", 0644, d_tracer,
7320 &tr->max_latency, &tracing_max_lat_fops);
7321 #endif
7322
7323 if (ftrace_create_function_files(tr, d_tracer))
7324 WARN(1, "Could not allocate function filter files");
7325
7326 #ifdef CONFIG_TRACER_SNAPSHOT
7327 trace_create_file("snapshot", 0644, d_tracer,
7328 tr, &snapshot_fops);
7329 #endif
7330
7331 for_each_tracing_cpu(cpu)
7332 tracing_init_tracefs_percpu(tr, cpu);
7333
7334 ftrace_init_tracefs(tr, d_tracer);
7335 }
7336
trace_automount(struct dentry * mntpt,void * ingore)7337 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7338 {
7339 struct vfsmount *mnt;
7340 struct file_system_type *type;
7341
7342 /*
7343 * To maintain backward compatibility for tools that mount
7344 * debugfs to get to the tracing facility, tracefs is automatically
7345 * mounted to the debugfs/tracing directory.
7346 */
7347 type = get_fs_type("tracefs");
7348 if (!type)
7349 return NULL;
7350 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7351 put_filesystem(type);
7352 if (IS_ERR(mnt))
7353 return NULL;
7354 mntget(mnt);
7355
7356 return mnt;
7357 }
7358
7359 /**
7360 * tracing_init_dentry - initialize top level trace array
7361 *
7362 * This is called when creating files or directories in the tracing
7363 * directory. It is called via fs_initcall() by any of the boot up code
7364 * and expects to return the dentry of the top level tracing directory.
7365 */
tracing_init_dentry(void)7366 struct dentry *tracing_init_dentry(void)
7367 {
7368 struct trace_array *tr = &global_trace;
7369
7370 /* The top level trace array uses NULL as parent */
7371 if (tr->dir)
7372 return NULL;
7373
7374 if (WARN_ON(!tracefs_initialized()) ||
7375 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7376 WARN_ON(!debugfs_initialized())))
7377 return ERR_PTR(-ENODEV);
7378
7379 /*
7380 * As there may still be users that expect the tracing
7381 * files to exist in debugfs/tracing, we must automount
7382 * the tracefs file system there, so older tools still
7383 * work with the newer kerenl.
7384 */
7385 tr->dir = debugfs_create_automount("tracing", NULL,
7386 trace_automount, NULL);
7387 if (!tr->dir) {
7388 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7389 return ERR_PTR(-ENOMEM);
7390 }
7391
7392 return NULL;
7393 }
7394
7395 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7396 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7397
trace_enum_init(void)7398 static void __init trace_enum_init(void)
7399 {
7400 int len;
7401
7402 len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7403 trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7404 }
7405
7406 #ifdef CONFIG_MODULES
trace_module_add_enums(struct module * mod)7407 static void trace_module_add_enums(struct module *mod)
7408 {
7409 if (!mod->num_trace_enums)
7410 return;
7411
7412 /*
7413 * Modules with bad taint do not have events created, do
7414 * not bother with enums either.
7415 */
7416 if (trace_module_has_bad_taint(mod))
7417 return;
7418
7419 trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7420 }
7421
7422 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
trace_module_remove_enums(struct module * mod)7423 static void trace_module_remove_enums(struct module *mod)
7424 {
7425 union trace_enum_map_item *map;
7426 union trace_enum_map_item **last = &trace_enum_maps;
7427
7428 if (!mod->num_trace_enums)
7429 return;
7430
7431 mutex_lock(&trace_enum_mutex);
7432
7433 map = trace_enum_maps;
7434
7435 while (map) {
7436 if (map->head.mod == mod)
7437 break;
7438 map = trace_enum_jmp_to_tail(map);
7439 last = &map->tail.next;
7440 map = map->tail.next;
7441 }
7442 if (!map)
7443 goto out;
7444
7445 *last = trace_enum_jmp_to_tail(map)->tail.next;
7446 kfree(map);
7447 out:
7448 mutex_unlock(&trace_enum_mutex);
7449 }
7450 #else
trace_module_remove_enums(struct module * mod)7451 static inline void trace_module_remove_enums(struct module *mod) { }
7452 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7453
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)7454 static int trace_module_notify(struct notifier_block *self,
7455 unsigned long val, void *data)
7456 {
7457 struct module *mod = data;
7458
7459 switch (val) {
7460 case MODULE_STATE_COMING:
7461 trace_module_add_enums(mod);
7462 break;
7463 case MODULE_STATE_GOING:
7464 trace_module_remove_enums(mod);
7465 break;
7466 }
7467
7468 return 0;
7469 }
7470
7471 static struct notifier_block trace_module_nb = {
7472 .notifier_call = trace_module_notify,
7473 .priority = 0,
7474 };
7475 #endif /* CONFIG_MODULES */
7476
tracer_init_tracefs(void)7477 static __init int tracer_init_tracefs(void)
7478 {
7479 struct dentry *d_tracer;
7480
7481 trace_access_lock_init();
7482
7483 d_tracer = tracing_init_dentry();
7484 if (IS_ERR(d_tracer))
7485 return 0;
7486
7487 init_tracer_tracefs(&global_trace, d_tracer);
7488 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7489
7490 trace_create_file("tracing_thresh", 0644, d_tracer,
7491 &global_trace, &tracing_thresh_fops);
7492
7493 trace_create_file("README", 0444, d_tracer,
7494 NULL, &tracing_readme_fops);
7495
7496 trace_create_file("saved_cmdlines", 0444, d_tracer,
7497 NULL, &tracing_saved_cmdlines_fops);
7498
7499 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7500 NULL, &tracing_saved_cmdlines_size_fops);
7501
7502 trace_enum_init();
7503
7504 trace_create_enum_file(d_tracer);
7505
7506 #ifdef CONFIG_MODULES
7507 register_module_notifier(&trace_module_nb);
7508 #endif
7509
7510 #ifdef CONFIG_DYNAMIC_FTRACE
7511 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7512 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7513 #endif
7514
7515 create_trace_instances(d_tracer);
7516
7517 update_tracer_options(&global_trace);
7518
7519 return 0;
7520 }
7521
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)7522 static int trace_panic_handler(struct notifier_block *this,
7523 unsigned long event, void *unused)
7524 {
7525 if (ftrace_dump_on_oops)
7526 ftrace_dump(ftrace_dump_on_oops);
7527 return NOTIFY_OK;
7528 }
7529
7530 static struct notifier_block trace_panic_notifier = {
7531 .notifier_call = trace_panic_handler,
7532 .next = NULL,
7533 .priority = 150 /* priority: INT_MAX >= x >= 0 */
7534 };
7535
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)7536 static int trace_die_handler(struct notifier_block *self,
7537 unsigned long val,
7538 void *data)
7539 {
7540 switch (val) {
7541 case DIE_OOPS:
7542 if (ftrace_dump_on_oops)
7543 ftrace_dump(ftrace_dump_on_oops);
7544 break;
7545 default:
7546 break;
7547 }
7548 return NOTIFY_OK;
7549 }
7550
7551 static struct notifier_block trace_die_notifier = {
7552 .notifier_call = trace_die_handler,
7553 .priority = 200
7554 };
7555
7556 /*
7557 * printk is set to max of 1024, we really don't need it that big.
7558 * Nothing should be printing 1000 characters anyway.
7559 */
7560 #define TRACE_MAX_PRINT 1000
7561
7562 /*
7563 * Define here KERN_TRACE so that we have one place to modify
7564 * it if we decide to change what log level the ftrace dump
7565 * should be at.
7566 */
7567 #define KERN_TRACE KERN_EMERG
7568
7569 void
trace_printk_seq(struct trace_seq * s)7570 trace_printk_seq(struct trace_seq *s)
7571 {
7572 /* Probably should print a warning here. */
7573 if (s->seq.len >= TRACE_MAX_PRINT)
7574 s->seq.len = TRACE_MAX_PRINT;
7575
7576 /*
7577 * More paranoid code. Although the buffer size is set to
7578 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7579 * an extra layer of protection.
7580 */
7581 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7582 s->seq.len = s->seq.size - 1;
7583
7584 /* should be zero ended, but we are paranoid. */
7585 s->buffer[s->seq.len] = 0;
7586
7587 printk(KERN_TRACE "%s", s->buffer);
7588
7589 trace_seq_init(s);
7590 }
7591
trace_init_global_iter(struct trace_iterator * iter)7592 void trace_init_global_iter(struct trace_iterator *iter)
7593 {
7594 iter->tr = &global_trace;
7595 iter->trace = iter->tr->current_trace;
7596 iter->cpu_file = RING_BUFFER_ALL_CPUS;
7597 iter->trace_buffer = &global_trace.trace_buffer;
7598
7599 if (iter->trace && iter->trace->open)
7600 iter->trace->open(iter);
7601
7602 /* Annotate start of buffers if we had overruns */
7603 if (ring_buffer_overruns(iter->trace_buffer->buffer))
7604 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7605
7606 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7607 if (trace_clocks[iter->tr->clock_id].in_ns)
7608 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7609 }
7610
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)7611 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7612 {
7613 /* use static because iter can be a bit big for the stack */
7614 static struct trace_iterator iter;
7615 static atomic_t dump_running;
7616 struct trace_array *tr = &global_trace;
7617 unsigned int old_userobj;
7618 unsigned long flags;
7619 int cnt = 0, cpu;
7620
7621 /* Only allow one dump user at a time. */
7622 if (atomic_inc_return(&dump_running) != 1) {
7623 atomic_dec(&dump_running);
7624 return;
7625 }
7626
7627 /*
7628 * Always turn off tracing when we dump.
7629 * We don't need to show trace output of what happens
7630 * between multiple crashes.
7631 *
7632 * If the user does a sysrq-z, then they can re-enable
7633 * tracing with echo 1 > tracing_on.
7634 */
7635 tracing_off();
7636
7637 local_irq_save(flags);
7638
7639 /* Simulate the iterator */
7640 trace_init_global_iter(&iter);
7641
7642 for_each_tracing_cpu(cpu) {
7643 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7644 }
7645
7646 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7647
7648 /* don't look at user memory in panic mode */
7649 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7650
7651 switch (oops_dump_mode) {
7652 case DUMP_ALL:
7653 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7654 break;
7655 case DUMP_ORIG:
7656 iter.cpu_file = raw_smp_processor_id();
7657 break;
7658 case DUMP_NONE:
7659 goto out_enable;
7660 default:
7661 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7662 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7663 }
7664
7665 printk(KERN_TRACE "Dumping ftrace buffer:\n");
7666
7667 /* Did function tracer already get disabled? */
7668 if (ftrace_is_dead()) {
7669 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7670 printk("# MAY BE MISSING FUNCTION EVENTS\n");
7671 }
7672
7673 /*
7674 * We need to stop all tracing on all CPUS to read the
7675 * the next buffer. This is a bit expensive, but is
7676 * not done often. We fill all what we can read,
7677 * and then release the locks again.
7678 */
7679
7680 while (!trace_empty(&iter)) {
7681
7682 if (!cnt)
7683 printk(KERN_TRACE "---------------------------------\n");
7684
7685 cnt++;
7686
7687 /* reset all but tr, trace, and overruns */
7688 memset(&iter.seq, 0,
7689 sizeof(struct trace_iterator) -
7690 offsetof(struct trace_iterator, seq));
7691 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7692 iter.pos = -1;
7693
7694 if (trace_find_next_entry_inc(&iter) != NULL) {
7695 int ret;
7696
7697 ret = print_trace_line(&iter);
7698 if (ret != TRACE_TYPE_NO_CONSUME)
7699 trace_consume(&iter);
7700 }
7701 touch_nmi_watchdog();
7702
7703 trace_printk_seq(&iter.seq);
7704 }
7705
7706 if (!cnt)
7707 printk(KERN_TRACE " (ftrace buffer empty)\n");
7708 else
7709 printk(KERN_TRACE "---------------------------------\n");
7710
7711 out_enable:
7712 tr->trace_flags |= old_userobj;
7713
7714 for_each_tracing_cpu(cpu) {
7715 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7716 }
7717 atomic_dec(&dump_running);
7718 local_irq_restore(flags);
7719 }
7720 EXPORT_SYMBOL_GPL(ftrace_dump);
7721
tracer_alloc_buffers(void)7722 __init static int tracer_alloc_buffers(void)
7723 {
7724 int ring_buf_size;
7725 int ret = -ENOMEM;
7726
7727 /*
7728 * Make sure we don't accidently add more trace options
7729 * than we have bits for.
7730 */
7731 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7732
7733 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7734 goto out;
7735
7736 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7737 goto out_free_buffer_mask;
7738
7739 /* Only allocate trace_printk buffers if a trace_printk exists */
7740 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7741 /* Must be called before global_trace.buffer is allocated */
7742 trace_printk_init_buffers();
7743
7744 /* To save memory, keep the ring buffer size to its minimum */
7745 if (ring_buffer_expanded)
7746 ring_buf_size = trace_buf_size;
7747 else
7748 ring_buf_size = 1;
7749
7750 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7751 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7752
7753 raw_spin_lock_init(&global_trace.start_lock);
7754
7755 /* Used for event triggers */
7756 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7757 if (!temp_buffer)
7758 goto out_free_cpumask;
7759
7760 if (trace_create_savedcmd() < 0)
7761 goto out_free_temp_buffer;
7762
7763 /* TODO: make the number of buffers hot pluggable with CPUS */
7764 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7765 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7766 WARN_ON(1);
7767 goto out_free_savedcmd;
7768 }
7769
7770 if (global_trace.buffer_disabled)
7771 tracing_off();
7772
7773 if (trace_boot_clock) {
7774 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7775 if (ret < 0)
7776 pr_warn("Trace clock %s not defined, going back to default\n",
7777 trace_boot_clock);
7778 }
7779
7780 /*
7781 * register_tracer() might reference current_trace, so it
7782 * needs to be set before we register anything. This is
7783 * just a bootstrap of current_trace anyway.
7784 */
7785 global_trace.current_trace = &nop_trace;
7786
7787 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7788
7789 ftrace_init_global_array_ops(&global_trace);
7790
7791 init_trace_flags_index(&global_trace);
7792
7793 register_tracer(&nop_trace);
7794
7795 /* All seems OK, enable tracing */
7796 tracing_disabled = 0;
7797
7798 atomic_notifier_chain_register(&panic_notifier_list,
7799 &trace_panic_notifier);
7800
7801 register_die_notifier(&trace_die_notifier);
7802
7803 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7804
7805 INIT_LIST_HEAD(&global_trace.systems);
7806 INIT_LIST_HEAD(&global_trace.events);
7807 list_add(&global_trace.list, &ftrace_trace_arrays);
7808
7809 apply_trace_boot_options();
7810
7811 register_snapshot_cmd();
7812
7813 return 0;
7814
7815 out_free_savedcmd:
7816 free_saved_cmdlines_buffer(savedcmd);
7817 out_free_temp_buffer:
7818 ring_buffer_free(temp_buffer);
7819 out_free_cpumask:
7820 free_cpumask_var(global_trace.tracing_cpumask);
7821 out_free_buffer_mask:
7822 free_cpumask_var(tracing_buffer_mask);
7823 out:
7824 return ret;
7825 }
7826
trace_init(void)7827 void __init trace_init(void)
7828 {
7829 if (tracepoint_printk) {
7830 tracepoint_print_iter =
7831 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7832 if (WARN_ON(!tracepoint_print_iter))
7833 tracepoint_printk = 0;
7834 }
7835 tracer_alloc_buffers();
7836 trace_event_init();
7837 }
7838
clear_boot_tracer(void)7839 __init static int clear_boot_tracer(void)
7840 {
7841 /*
7842 * The default tracer at boot buffer is an init section.
7843 * This function is called in lateinit. If we did not
7844 * find the boot tracer, then clear it out, to prevent
7845 * later registration from accessing the buffer that is
7846 * about to be freed.
7847 */
7848 if (!default_bootup_tracer)
7849 return 0;
7850
7851 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7852 default_bootup_tracer);
7853 default_bootup_tracer = NULL;
7854
7855 return 0;
7856 }
7857
7858 fs_initcall(tracer_init_tracefs);
7859 late_initcall_sync(clear_boot_tracer);
7860