1 /*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
13 */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing.
52 */
53 bool ring_buffer_expanded;
54
55 /*
56 * We need to change this state when a selftest is running.
57 * A selftest will lurk into the ring-buffer to count the
58 * entries inserted during the selftest although some concurrent
59 * insertions into the ring-buffer such as trace_printk could occurred
60 * at the same time, giving false positive or negative results.
61 */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65 * If a tracer is running, we do not want to run SELFTEST.
66 */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76 { }
77 };
78
79 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82 return 0;
83 }
84
85 /*
86 * To prevent the comm cache from being overwritten when no
87 * tracing is active, only save the comm when a trace event
88 * occurred.
89 */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91
92 /*
93 * Kill all tracing for good (never come back).
94 * It is initialized to 1 but will turn to zero if the initialization
95 * of the tracer is successful. But that is the only place that sets
96 * this back to zero.
97 */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly tracing_buffer_mask;
101
102 /*
103 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104 *
105 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106 * is set, then ftrace_dump is called. This will output the contents
107 * of the ftrace buffers to the console. This is very useful for
108 * capturing traces that lead to crashes and outputing it to a
109 * serial console.
110 *
111 * It is default off, but you can enable it with either specifying
112 * "ftrace_dump_on_oops" in the kernel command line, or setting
113 * /proc/sys/kernel/ftrace_dump_on_oops
114 * Set 1 if you want to dump buffers of all CPUs
115 * Set 2 if you want to dump the buffer of the CPU that triggered oops
116 */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126 struct module *mod;
127 unsigned long length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133 /*
134 * "end" is first and points to NULL as it must be different
135 * than "mod" or "eval_string"
136 */
137 union trace_eval_map_item *next;
138 const char *end; /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144 * The trace_eval_maps are saved in an array with two extra elements,
145 * one at the beginning, and one at the end. The beginning item contains
146 * the count of the saved maps (head.length), and the module they
147 * belong to if not built in (head.mod). The ending item contains a
148 * pointer to the next array of saved eval_map items.
149 */
150 union trace_eval_map_item {
151 struct trace_eval_map map;
152 struct trace_eval_map_head head;
153 struct trace_eval_map_tail tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE 100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
set_cmdline_ftrace(char * str)167 static int __init set_cmdline_ftrace(char *str)
168 {
169 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 default_bootup_tracer = bootup_tracer_buf;
171 /* We are using ftrace early, expand it */
172 ring_buffer_expanded = true;
173 return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
set_ftrace_dump_on_oops(char * str)177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179 if (*str++ != '=' || !*str) {
180 ftrace_dump_on_oops = DUMP_ALL;
181 return 1;
182 }
183
184 if (!strcmp("orig_cpu", str)) {
185 ftrace_dump_on_oops = DUMP_ORIG;
186 return 1;
187 }
188
189 return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
stop_trace_on_warning(char * str)193 static int __init stop_trace_on_warning(char *str)
194 {
195 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 __disable_trace_on_warning = 1;
197 return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
boot_alloc_snapshot(char * str)201 static int __init boot_alloc_snapshot(char *str)
202 {
203 allocate_snapshot = true;
204 /* We also need the main ring buffer expanded */
205 ring_buffer_expanded = true;
206 return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
set_trace_boot_options(char * str)213 static int __init set_trace_boot_options(char *str)
214 {
215 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216 return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
set_trace_boot_clock(char * str)223 static int __init set_trace_boot_clock(char *str)
224 {
225 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 trace_boot_clock = trace_boot_clock_buf;
227 return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
set_tracepoint_printk(char * str)231 static int __init set_tracepoint_printk(char *str)
232 {
233 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 tracepoint_printk = 1;
235 return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
ns2usecs(u64 nsec)239 unsigned long long ns2usecs(u64 nsec)
240 {
241 nsec += 500;
242 do_div(nsec, 1000);
243 return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS \
248 (FUNCTION_DEFAULT_FLAGS | \
249 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
250 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
251 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
252 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
256 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263 * The global_trace is the descriptor that holds the top-level tracing
264 * buffers for the live tracing.
265 */
266 static struct trace_array global_trace = {
267 .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
trace_array_get(struct trace_array * this_tr)272 int trace_array_get(struct trace_array *this_tr)
273 {
274 struct trace_array *tr;
275 int ret = -ENODEV;
276
277 mutex_lock(&trace_types_lock);
278 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279 if (tr == this_tr) {
280 tr->ref++;
281 ret = 0;
282 break;
283 }
284 }
285 mutex_unlock(&trace_types_lock);
286
287 return ret;
288 }
289
__trace_array_put(struct trace_array * this_tr)290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292 WARN_ON(!this_tr->ref);
293 this_tr->ref--;
294 }
295
trace_array_put(struct trace_array * this_tr)296 void trace_array_put(struct trace_array *this_tr)
297 {
298 mutex_lock(&trace_types_lock);
299 __trace_array_put(this_tr);
300 mutex_unlock(&trace_types_lock);
301 }
302
call_filter_check_discard(struct trace_event_call * call,void * rec,struct ring_buffer * buffer,struct ring_buffer_event * event)303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 struct ring_buffer *buffer,
305 struct ring_buffer_event *event)
306 {
307 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 !filter_match_preds(call->filter, rec)) {
309 __trace_event_discard_commit(buffer, event);
310 return 1;
311 }
312
313 return 0;
314 }
315
trace_free_pid_list(struct trace_pid_list * pid_list)316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318 vfree(pid_list->pids);
319 kfree(pid_list);
320 }
321
322 /**
323 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324 * @filtered_pids: The list of pids to check
325 * @search_pid: The PID to find in @filtered_pids
326 *
327 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328 */
329 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332 /*
333 * If pid_max changed after filtered_pids was created, we
334 * by default ignore all pids greater than the previous pid_max.
335 */
336 if (search_pid >= filtered_pids->pid_max)
337 return false;
338
339 return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343 * trace_ignore_this_task - should a task be ignored for tracing
344 * @filtered_pids: The list of pids to check
345 * @task: The task that should be ignored if not filtered
346 *
347 * Checks if @task should be traced or not from @filtered_pids.
348 * Returns true if @task should *NOT* be traced.
349 * Returns false if @task should be traced.
350 */
351 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct task_struct * task)352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354 /*
355 * Return false, because if filtered_pids does not exist,
356 * all pids are good to trace.
357 */
358 if (!filtered_pids)
359 return false;
360
361 return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366 * @pid_list: The list to modify
367 * @self: The current task for fork or NULL for exit
368 * @task: The task to add or remove
369 *
370 * If adding a task, if @self is defined, the task is only added if @self
371 * is also included in @pid_list. This happens on fork and tasks should
372 * only be added when the parent is listed. If @self is NULL, then the
373 * @task pid will be removed from the list, which would happen on exit
374 * of a task.
375 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 struct task_struct *self,
378 struct task_struct *task)
379 {
380 if (!pid_list)
381 return;
382
383 /* For forks, we only add if the forking task is listed */
384 if (self) {
385 if (!trace_find_filtered_pid(pid_list, self->pid))
386 return;
387 }
388
389 /* Sorry, but we don't support pid_max changing after setting */
390 if (task->pid >= pid_list->pid_max)
391 return;
392
393 /* "self" is set for forks, and NULL for exits */
394 if (self)
395 set_bit(task->pid, pid_list->pids);
396 else
397 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402 * @pid_list: The pid list to show
403 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404 * @pos: The position of the file
405 *
406 * This is used by the seq_file "next" operation to iterate the pids
407 * listed in a trace_pid_list structure.
408 *
409 * Returns the pid+1 as we want to display pid of zero, but NULL would
410 * stop the iteration.
411 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414 unsigned long pid = (unsigned long)v;
415
416 (*pos)++;
417
418 /* pid already is +1 of the actual prevous bit */
419 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421 /* Return pid + 1 to allow zero to be represented */
422 if (pid < pid_list->pid_max)
423 return (void *)(pid + 1);
424
425 return NULL;
426 }
427
428 /**
429 * trace_pid_start - Used for seq_file to start reading pid lists
430 * @pid_list: The pid list to show
431 * @pos: The position of the file
432 *
433 * This is used by seq_file "start" operation to start the iteration
434 * of listing pids.
435 *
436 * Returns the pid+1 as we want to display pid of zero, but NULL would
437 * stop the iteration.
438 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441 unsigned long pid;
442 loff_t l = 0;
443
444 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 if (pid >= pid_list->pid_max)
446 return NULL;
447
448 /* Return pid + 1 so that zero can be the exit value */
449 for (pid++; pid && l < *pos;
450 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451 ;
452 return (void *)pid;
453 }
454
455 /**
456 * trace_pid_show - show the current pid in seq_file processing
457 * @m: The seq_file structure to write into
458 * @v: A void pointer of the pid (+1) value to display
459 *
460 * Can be directly used by seq_file operations to display the current
461 * pid value.
462 */
trace_pid_show(struct seq_file * m,void * v)463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465 unsigned long pid = (unsigned long)v - 1;
466
467 seq_printf(m, "%lu\n", pid);
468 return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE 127
473
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 struct trace_pid_list **new_pid_list,
476 const char __user *ubuf, size_t cnt)
477 {
478 struct trace_pid_list *pid_list;
479 struct trace_parser parser;
480 unsigned long val;
481 int nr_pids = 0;
482 ssize_t read = 0;
483 ssize_t ret = 0;
484 loff_t pos;
485 pid_t pid;
486
487 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488 return -ENOMEM;
489
490 /*
491 * Always recreate a new array. The write is an all or nothing
492 * operation. Always create a new array when adding new pids by
493 * the user. If the operation fails, then the current list is
494 * not modified.
495 */
496 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497 if (!pid_list) {
498 trace_parser_put(&parser);
499 return -ENOMEM;
500 }
501
502 pid_list->pid_max = READ_ONCE(pid_max);
503
504 /* Only truncating will shrink pid_max */
505 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 pid_list->pid_max = filtered_pids->pid_max;
507
508 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 if (!pid_list->pids) {
510 trace_parser_put(&parser);
511 kfree(pid_list);
512 return -ENOMEM;
513 }
514
515 if (filtered_pids) {
516 /* copy the current bits to the new max */
517 for_each_set_bit(pid, filtered_pids->pids,
518 filtered_pids->pid_max) {
519 set_bit(pid, pid_list->pids);
520 nr_pids++;
521 }
522 }
523
524 while (cnt > 0) {
525
526 pos = 0;
527
528 ret = trace_get_user(&parser, ubuf, cnt, &pos);
529 if (ret < 0 || !trace_parser_loaded(&parser))
530 break;
531
532 read += ret;
533 ubuf += ret;
534 cnt -= ret;
535
536 parser.buffer[parser.idx] = 0;
537
538 ret = -EINVAL;
539 if (kstrtoul(parser.buffer, 0, &val))
540 break;
541 if (val >= pid_list->pid_max)
542 break;
543
544 pid = (pid_t)val;
545
546 set_bit(pid, pid_list->pids);
547 nr_pids++;
548
549 trace_parser_clear(&parser);
550 ret = 0;
551 }
552 trace_parser_put(&parser);
553
554 if (ret < 0) {
555 trace_free_pid_list(pid_list);
556 return ret;
557 }
558
559 if (!nr_pids) {
560 /* Cleared the list of pids */
561 trace_free_pid_list(pid_list);
562 read = ret;
563 pid_list = NULL;
564 }
565
566 *new_pid_list = pid_list;
567
568 return read;
569 }
570
buffer_ftrace_now(struct trace_buffer * buf,int cpu)571 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 {
573 u64 ts;
574
575 /* Early boot up does not have a buffer yet */
576 if (!buf->buffer)
577 return trace_clock_local();
578
579 ts = ring_buffer_time_stamp(buf->buffer, cpu);
580 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
581
582 return ts;
583 }
584
ftrace_now(int cpu)585 u64 ftrace_now(int cpu)
586 {
587 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 }
589
590 /**
591 * tracing_is_enabled - Show if global_trace has been disabled
592 *
593 * Shows if the global trace has been enabled or not. It uses the
594 * mirror flag "buffer_disabled" to be used in fast paths such as for
595 * the irqsoff tracer. But it may be inaccurate due to races. If you
596 * need to know the accurate state, use tracing_is_on() which is a little
597 * slower, but accurate.
598 */
tracing_is_enabled(void)599 int tracing_is_enabled(void)
600 {
601 /*
602 * For quick access (irqsoff uses this in fast path), just
603 * return the mirror variable of the state of the ring buffer.
604 * It's a little racy, but we don't really care.
605 */
606 smp_rmb();
607 return !global_trace.buffer_disabled;
608 }
609
610 /*
611 * trace_buf_size is the size in bytes that is allocated
612 * for a buffer. Note, the number of bytes is always rounded
613 * to page size.
614 *
615 * This number is purposely set to a low number of 16384.
616 * If the dump on oops happens, it will be much appreciated
617 * to not have to wait for all that output. Anyway this can be
618 * boot time and run time configurable.
619 */
620 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
621
622 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
623
624 /* trace_types holds a link list of available tracers. */
625 static struct tracer *trace_types __read_mostly;
626
627 /*
628 * trace_types_lock is used to protect the trace_types list.
629 */
630 DEFINE_MUTEX(trace_types_lock);
631
632 /*
633 * serialize the access of the ring buffer
634 *
635 * ring buffer serializes readers, but it is low level protection.
636 * The validity of the events (which returns by ring_buffer_peek() ..etc)
637 * are not protected by ring buffer.
638 *
639 * The content of events may become garbage if we allow other process consumes
640 * these events concurrently:
641 * A) the page of the consumed events may become a normal page
642 * (not reader page) in ring buffer, and this page will be rewrited
643 * by events producer.
644 * B) The page of the consumed events may become a page for splice_read,
645 * and this page will be returned to system.
646 *
647 * These primitives allow multi process access to different cpu ring buffer
648 * concurrently.
649 *
650 * These primitives don't distinguish read-only and read-consume access.
651 * Multi read-only access are also serialized.
652 */
653
654 #ifdef CONFIG_SMP
655 static DECLARE_RWSEM(all_cpu_access_lock);
656 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
657
trace_access_lock(int cpu)658 static inline void trace_access_lock(int cpu)
659 {
660 if (cpu == RING_BUFFER_ALL_CPUS) {
661 /* gain it for accessing the whole ring buffer. */
662 down_write(&all_cpu_access_lock);
663 } else {
664 /* gain it for accessing a cpu ring buffer. */
665
666 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
667 down_read(&all_cpu_access_lock);
668
669 /* Secondly block other access to this @cpu ring buffer. */
670 mutex_lock(&per_cpu(cpu_access_lock, cpu));
671 }
672 }
673
trace_access_unlock(int cpu)674 static inline void trace_access_unlock(int cpu)
675 {
676 if (cpu == RING_BUFFER_ALL_CPUS) {
677 up_write(&all_cpu_access_lock);
678 } else {
679 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
680 up_read(&all_cpu_access_lock);
681 }
682 }
683
trace_access_lock_init(void)684 static inline void trace_access_lock_init(void)
685 {
686 int cpu;
687
688 for_each_possible_cpu(cpu)
689 mutex_init(&per_cpu(cpu_access_lock, cpu));
690 }
691
692 #else
693
694 static DEFINE_MUTEX(access_lock);
695
trace_access_lock(int cpu)696 static inline void trace_access_lock(int cpu)
697 {
698 (void)cpu;
699 mutex_lock(&access_lock);
700 }
701
trace_access_unlock(int cpu)702 static inline void trace_access_unlock(int cpu)
703 {
704 (void)cpu;
705 mutex_unlock(&access_lock);
706 }
707
trace_access_lock_init(void)708 static inline void trace_access_lock_init(void)
709 {
710 }
711
712 #endif
713
714 #ifdef CONFIG_STACKTRACE
715 static void __ftrace_trace_stack(struct ring_buffer *buffer,
716 unsigned long flags,
717 int skip, int pc, struct pt_regs *regs);
718 static inline void ftrace_trace_stack(struct trace_array *tr,
719 struct ring_buffer *buffer,
720 unsigned long flags,
721 int skip, int pc, struct pt_regs *regs);
722
723 #else
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)724 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
725 unsigned long flags,
726 int skip, int pc, struct pt_regs *regs)
727 {
728 }
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)729 static inline void ftrace_trace_stack(struct trace_array *tr,
730 struct ring_buffer *buffer,
731 unsigned long flags,
732 int skip, int pc, struct pt_regs *regs)
733 {
734 }
735
736 #endif
737
738 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)739 trace_event_setup(struct ring_buffer_event *event,
740 int type, unsigned long flags, int pc)
741 {
742 struct trace_entry *ent = ring_buffer_event_data(event);
743
744 tracing_generic_entry_update(ent, flags, pc);
745 ent->type = type;
746 }
747
748 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)749 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
750 int type,
751 unsigned long len,
752 unsigned long flags, int pc)
753 {
754 struct ring_buffer_event *event;
755
756 event = ring_buffer_lock_reserve(buffer, len);
757 if (event != NULL)
758 trace_event_setup(event, type, flags, pc);
759
760 return event;
761 }
762
tracer_tracing_on(struct trace_array * tr)763 void tracer_tracing_on(struct trace_array *tr)
764 {
765 if (tr->trace_buffer.buffer)
766 ring_buffer_record_on(tr->trace_buffer.buffer);
767 /*
768 * This flag is looked at when buffers haven't been allocated
769 * yet, or by some tracers (like irqsoff), that just want to
770 * know if the ring buffer has been disabled, but it can handle
771 * races of where it gets disabled but we still do a record.
772 * As the check is in the fast path of the tracers, it is more
773 * important to be fast than accurate.
774 */
775 tr->buffer_disabled = 0;
776 /* Make the flag seen by readers */
777 smp_wmb();
778 }
779
780 /**
781 * tracing_on - enable tracing buffers
782 *
783 * This function enables tracing buffers that may have been
784 * disabled with tracing_off.
785 */
tracing_on(void)786 void tracing_on(void)
787 {
788 tracer_tracing_on(&global_trace);
789 }
790 EXPORT_SYMBOL_GPL(tracing_on);
791
792
793 static __always_inline void
__buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)794 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
795 {
796 __this_cpu_write(trace_taskinfo_save, true);
797
798 /* If this is the temp buffer, we need to commit fully */
799 if (this_cpu_read(trace_buffered_event) == event) {
800 /* Length is in event->array[0] */
801 ring_buffer_write(buffer, event->array[0], &event->array[1]);
802 /* Release the temp buffer */
803 this_cpu_dec(trace_buffered_event_cnt);
804 } else
805 ring_buffer_unlock_commit(buffer, event);
806 }
807
808 /**
809 * __trace_puts - write a constant string into the trace buffer.
810 * @ip: The address of the caller
811 * @str: The constant string to write
812 * @size: The size of the string.
813 */
__trace_puts(unsigned long ip,const char * str,int size)814 int __trace_puts(unsigned long ip, const char *str, int size)
815 {
816 struct ring_buffer_event *event;
817 struct ring_buffer *buffer;
818 struct print_entry *entry;
819 unsigned long irq_flags;
820 int alloc;
821 int pc;
822
823 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
824 return 0;
825
826 pc = preempt_count();
827
828 if (unlikely(tracing_selftest_running || tracing_disabled))
829 return 0;
830
831 alloc = sizeof(*entry) + size + 2; /* possible \n added */
832
833 local_save_flags(irq_flags);
834 buffer = global_trace.trace_buffer.buffer;
835 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
836 irq_flags, pc);
837 if (!event)
838 return 0;
839
840 entry = ring_buffer_event_data(event);
841 entry->ip = ip;
842
843 memcpy(&entry->buf, str, size);
844
845 /* Add a newline if necessary */
846 if (entry->buf[size - 1] != '\n') {
847 entry->buf[size] = '\n';
848 entry->buf[size + 1] = '\0';
849 } else
850 entry->buf[size] = '\0';
851
852 __buffer_unlock_commit(buffer, event);
853 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854
855 return size;
856 }
857 EXPORT_SYMBOL_GPL(__trace_puts);
858
859 /**
860 * __trace_bputs - write the pointer to a constant string into trace buffer
861 * @ip: The address of the caller
862 * @str: The constant string to write to the buffer to
863 */
__trace_bputs(unsigned long ip,const char * str)864 int __trace_bputs(unsigned long ip, const char *str)
865 {
866 struct ring_buffer_event *event;
867 struct ring_buffer *buffer;
868 struct bputs_entry *entry;
869 unsigned long irq_flags;
870 int size = sizeof(struct bputs_entry);
871 int pc;
872
873 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
874 return 0;
875
876 pc = preempt_count();
877
878 if (unlikely(tracing_selftest_running || tracing_disabled))
879 return 0;
880
881 local_save_flags(irq_flags);
882 buffer = global_trace.trace_buffer.buffer;
883 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
884 irq_flags, pc);
885 if (!event)
886 return 0;
887
888 entry = ring_buffer_event_data(event);
889 entry->ip = ip;
890 entry->str = str;
891
892 __buffer_unlock_commit(buffer, event);
893 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894
895 return 1;
896 }
897 EXPORT_SYMBOL_GPL(__trace_bputs);
898
899 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance(struct trace_array * tr)900 void tracing_snapshot_instance(struct trace_array *tr)
901 {
902 struct tracer *tracer = tr->current_trace;
903 unsigned long flags;
904
905 if (in_nmi()) {
906 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
907 internal_trace_puts("*** snapshot is being ignored ***\n");
908 return;
909 }
910
911 if (!tr->allocated_snapshot) {
912 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
913 internal_trace_puts("*** stopping trace here! ***\n");
914 tracing_off();
915 return;
916 }
917
918 /* Note, snapshot can not be used when the tracer uses it */
919 if (tracer->use_max_tr) {
920 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
921 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922 return;
923 }
924
925 local_irq_save(flags);
926 update_max_tr(tr, current, smp_processor_id());
927 local_irq_restore(flags);
928 }
929
930 /**
931 * trace_snapshot - take a snapshot of the current buffer.
932 *
933 * This causes a swap between the snapshot buffer and the current live
934 * tracing buffer. You can use this to take snapshots of the live
935 * trace when some condition is triggered, but continue to trace.
936 *
937 * Note, make sure to allocate the snapshot with either
938 * a tracing_snapshot_alloc(), or by doing it manually
939 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
940 *
941 * If the snapshot buffer is not allocated, it will stop tracing.
942 * Basically making a permanent snapshot.
943 */
tracing_snapshot(void)944 void tracing_snapshot(void)
945 {
946 struct trace_array *tr = &global_trace;
947
948 tracing_snapshot_instance(tr);
949 }
950 EXPORT_SYMBOL_GPL(tracing_snapshot);
951
952 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
953 struct trace_buffer *size_buf, int cpu_id);
954 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
955
tracing_alloc_snapshot_instance(struct trace_array * tr)956 int tracing_alloc_snapshot_instance(struct trace_array *tr)
957 {
958 int ret;
959
960 if (!tr->allocated_snapshot) {
961
962 /* allocate spare buffer */
963 ret = resize_buffer_duplicate_size(&tr->max_buffer,
964 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
965 if (ret < 0)
966 return ret;
967
968 tr->allocated_snapshot = true;
969 }
970
971 return 0;
972 }
973
free_snapshot(struct trace_array * tr)974 static void free_snapshot(struct trace_array *tr)
975 {
976 /*
977 * We don't free the ring buffer. instead, resize it because
978 * The max_tr ring buffer has some state (e.g. ring->clock) and
979 * we want preserve it.
980 */
981 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
982 set_buffer_entries(&tr->max_buffer, 1);
983 tracing_reset_online_cpus(&tr->max_buffer);
984 tr->allocated_snapshot = false;
985 }
986
987 /**
988 * tracing_alloc_snapshot - allocate snapshot buffer.
989 *
990 * This only allocates the snapshot buffer if it isn't already
991 * allocated - it doesn't also take a snapshot.
992 *
993 * This is meant to be used in cases where the snapshot buffer needs
994 * to be set up for events that can't sleep but need to be able to
995 * trigger a snapshot.
996 */
tracing_alloc_snapshot(void)997 int tracing_alloc_snapshot(void)
998 {
999 struct trace_array *tr = &global_trace;
1000 int ret;
1001
1002 ret = tracing_alloc_snapshot_instance(tr);
1003 WARN_ON(ret < 0);
1004
1005 return ret;
1006 }
1007 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1008
1009 /**
1010 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1011 *
1012 * This is similar to trace_snapshot(), but it will allocate the
1013 * snapshot buffer if it isn't already allocated. Use this only
1014 * where it is safe to sleep, as the allocation may sleep.
1015 *
1016 * This causes a swap between the snapshot buffer and the current live
1017 * tracing buffer. You can use this to take snapshots of the live
1018 * trace when some condition is triggered, but continue to trace.
1019 */
tracing_snapshot_alloc(void)1020 void tracing_snapshot_alloc(void)
1021 {
1022 int ret;
1023
1024 ret = tracing_alloc_snapshot();
1025 if (ret < 0)
1026 return;
1027
1028 tracing_snapshot();
1029 }
1030 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1031 #else
tracing_snapshot(void)1032 void tracing_snapshot(void)
1033 {
1034 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_alloc_snapshot(void)1037 int tracing_alloc_snapshot(void)
1038 {
1039 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1040 return -ENODEV;
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1043 void tracing_snapshot_alloc(void)
1044 {
1045 /* Give warning */
1046 tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049 #endif /* CONFIG_TRACER_SNAPSHOT */
1050
tracer_tracing_off(struct trace_array * tr)1051 void tracer_tracing_off(struct trace_array *tr)
1052 {
1053 if (tr->trace_buffer.buffer)
1054 ring_buffer_record_off(tr->trace_buffer.buffer);
1055 /*
1056 * This flag is looked at when buffers haven't been allocated
1057 * yet, or by some tracers (like irqsoff), that just want to
1058 * know if the ring buffer has been disabled, but it can handle
1059 * races of where it gets disabled but we still do a record.
1060 * As the check is in the fast path of the tracers, it is more
1061 * important to be fast than accurate.
1062 */
1063 tr->buffer_disabled = 1;
1064 /* Make the flag seen by readers */
1065 smp_wmb();
1066 }
1067
1068 /**
1069 * tracing_off - turn off tracing buffers
1070 *
1071 * This function stops the tracing buffers from recording data.
1072 * It does not disable any overhead the tracers themselves may
1073 * be causing. This function simply causes all recording to
1074 * the ring buffers to fail.
1075 */
tracing_off(void)1076 void tracing_off(void)
1077 {
1078 tracer_tracing_off(&global_trace);
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_off);
1081
disable_trace_on_warning(void)1082 void disable_trace_on_warning(void)
1083 {
1084 if (__disable_trace_on_warning)
1085 tracing_off();
1086 }
1087
1088 /**
1089 * tracer_tracing_is_on - show real state of ring buffer enabled
1090 * @tr : the trace array to know if ring buffer is enabled
1091 *
1092 * Shows real state of the ring buffer if it is enabled or not.
1093 */
tracer_tracing_is_on(struct trace_array * tr)1094 int tracer_tracing_is_on(struct trace_array *tr)
1095 {
1096 if (tr->trace_buffer.buffer)
1097 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1098 return !tr->buffer_disabled;
1099 }
1100
1101 /**
1102 * tracing_is_on - show state of ring buffers enabled
1103 */
tracing_is_on(void)1104 int tracing_is_on(void)
1105 {
1106 return tracer_tracing_is_on(&global_trace);
1107 }
1108 EXPORT_SYMBOL_GPL(tracing_is_on);
1109
set_buf_size(char * str)1110 static int __init set_buf_size(char *str)
1111 {
1112 unsigned long buf_size;
1113
1114 if (!str)
1115 return 0;
1116 buf_size = memparse(str, &str);
1117 /* nr_entries can not be zero */
1118 if (buf_size == 0)
1119 return 0;
1120 trace_buf_size = buf_size;
1121 return 1;
1122 }
1123 __setup("trace_buf_size=", set_buf_size);
1124
set_tracing_thresh(char * str)1125 static int __init set_tracing_thresh(char *str)
1126 {
1127 unsigned long threshold;
1128 int ret;
1129
1130 if (!str)
1131 return 0;
1132 ret = kstrtoul(str, 0, &threshold);
1133 if (ret < 0)
1134 return 0;
1135 tracing_thresh = threshold * 1000;
1136 return 1;
1137 }
1138 __setup("tracing_thresh=", set_tracing_thresh);
1139
nsecs_to_usecs(unsigned long nsecs)1140 unsigned long nsecs_to_usecs(unsigned long nsecs)
1141 {
1142 return nsecs / 1000;
1143 }
1144
1145 /*
1146 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1147 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1148 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1149 * of strings in the order that the evals (enum) were defined.
1150 */
1151 #undef C
1152 #define C(a, b) b
1153
1154 /* These must match the bit postions in trace_iterator_flags */
1155 static const char *trace_options[] = {
1156 TRACE_FLAGS
1157 NULL
1158 };
1159
1160 static struct {
1161 u64 (*func)(void);
1162 const char *name;
1163 int in_ns; /* is this clock in nanoseconds? */
1164 } trace_clocks[] = {
1165 { trace_clock_local, "local", 1 },
1166 { trace_clock_global, "global", 1 },
1167 { trace_clock_counter, "counter", 0 },
1168 { trace_clock_jiffies, "uptime", 0 },
1169 { trace_clock, "perf", 1 },
1170 { ktime_get_mono_fast_ns, "mono", 1 },
1171 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1172 { ktime_get_boot_fast_ns, "boot", 1 },
1173 ARCH_TRACE_CLOCKS
1174 };
1175
1176 /*
1177 * trace_parser_get_init - gets the buffer for trace parser
1178 */
trace_parser_get_init(struct trace_parser * parser,int size)1179 int trace_parser_get_init(struct trace_parser *parser, int size)
1180 {
1181 memset(parser, 0, sizeof(*parser));
1182
1183 parser->buffer = kmalloc(size, GFP_KERNEL);
1184 if (!parser->buffer)
1185 return 1;
1186
1187 parser->size = size;
1188 return 0;
1189 }
1190
1191 /*
1192 * trace_parser_put - frees the buffer for trace parser
1193 */
trace_parser_put(struct trace_parser * parser)1194 void trace_parser_put(struct trace_parser *parser)
1195 {
1196 kfree(parser->buffer);
1197 parser->buffer = NULL;
1198 }
1199
1200 /*
1201 * trace_get_user - reads the user input string separated by space
1202 * (matched by isspace(ch))
1203 *
1204 * For each string found the 'struct trace_parser' is updated,
1205 * and the function returns.
1206 *
1207 * Returns number of bytes read.
1208 *
1209 * See kernel/trace/trace.h for 'struct trace_parser' details.
1210 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1211 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1212 size_t cnt, loff_t *ppos)
1213 {
1214 char ch;
1215 size_t read = 0;
1216 ssize_t ret;
1217
1218 if (!*ppos)
1219 trace_parser_clear(parser);
1220
1221 ret = get_user(ch, ubuf++);
1222 if (ret)
1223 goto out;
1224
1225 read++;
1226 cnt--;
1227
1228 /*
1229 * The parser is not finished with the last write,
1230 * continue reading the user input without skipping spaces.
1231 */
1232 if (!parser->cont) {
1233 /* skip white space */
1234 while (cnt && isspace(ch)) {
1235 ret = get_user(ch, ubuf++);
1236 if (ret)
1237 goto out;
1238 read++;
1239 cnt--;
1240 }
1241
1242 /* only spaces were written */
1243 if (isspace(ch)) {
1244 *ppos += read;
1245 ret = read;
1246 goto out;
1247 }
1248
1249 parser->idx = 0;
1250 }
1251
1252 /* read the non-space input */
1253 while (cnt && !isspace(ch)) {
1254 if (parser->idx < parser->size - 1)
1255 parser->buffer[parser->idx++] = ch;
1256 else {
1257 ret = -EINVAL;
1258 goto out;
1259 }
1260 ret = get_user(ch, ubuf++);
1261 if (ret)
1262 goto out;
1263 read++;
1264 cnt--;
1265 }
1266
1267 /* We either got finished input or we have to wait for another call. */
1268 if (isspace(ch)) {
1269 parser->buffer[parser->idx] = 0;
1270 parser->cont = false;
1271 } else if (parser->idx < parser->size - 1) {
1272 parser->cont = true;
1273 parser->buffer[parser->idx++] = ch;
1274 } else {
1275 ret = -EINVAL;
1276 goto out;
1277 }
1278
1279 *ppos += read;
1280 ret = read;
1281
1282 out:
1283 return ret;
1284 }
1285
1286 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1287 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1288 {
1289 int len;
1290
1291 if (trace_seq_used(s) <= s->seq.readpos)
1292 return -EBUSY;
1293
1294 len = trace_seq_used(s) - s->seq.readpos;
1295 if (cnt > len)
1296 cnt = len;
1297 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1298
1299 s->seq.readpos += cnt;
1300 return cnt;
1301 }
1302
1303 unsigned long __read_mostly tracing_thresh;
1304
1305 #ifdef CONFIG_TRACER_MAX_TRACE
1306 /*
1307 * Copy the new maximum trace into the separate maximum-trace
1308 * structure. (this way the maximum trace is permanently saved,
1309 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1310 */
1311 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1312 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1313 {
1314 struct trace_buffer *trace_buf = &tr->trace_buffer;
1315 struct trace_buffer *max_buf = &tr->max_buffer;
1316 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1317 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1318
1319 max_buf->cpu = cpu;
1320 max_buf->time_start = data->preempt_timestamp;
1321
1322 max_data->saved_latency = tr->max_latency;
1323 max_data->critical_start = data->critical_start;
1324 max_data->critical_end = data->critical_end;
1325
1326 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1327 max_data->pid = tsk->pid;
1328 /*
1329 * If tsk == current, then use current_uid(), as that does not use
1330 * RCU. The irq tracer can be called out of RCU scope.
1331 */
1332 if (tsk == current)
1333 max_data->uid = current_uid();
1334 else
1335 max_data->uid = task_uid(tsk);
1336
1337 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1338 max_data->policy = tsk->policy;
1339 max_data->rt_priority = tsk->rt_priority;
1340
1341 /* record this tasks comm */
1342 tracing_record_cmdline(tsk);
1343 }
1344
1345 /**
1346 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1347 * @tr: tracer
1348 * @tsk: the task with the latency
1349 * @cpu: The cpu that initiated the trace.
1350 *
1351 * Flip the buffers between the @tr and the max_tr and record information
1352 * about which task was the cause of this latency.
1353 */
1354 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1355 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1356 {
1357 struct ring_buffer *buf;
1358
1359 if (tr->stop_count)
1360 return;
1361
1362 WARN_ON_ONCE(!irqs_disabled());
1363
1364 if (!tr->allocated_snapshot) {
1365 /* Only the nop tracer should hit this when disabling */
1366 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1367 return;
1368 }
1369
1370 arch_spin_lock(&tr->max_lock);
1371
1372 /* Inherit the recordable setting from trace_buffer */
1373 if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1374 ring_buffer_record_on(tr->max_buffer.buffer);
1375 else
1376 ring_buffer_record_off(tr->max_buffer.buffer);
1377
1378 buf = tr->trace_buffer.buffer;
1379 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380 tr->max_buffer.buffer = buf;
1381
1382 __update_max_tr(tr, tsk, cpu);
1383 arch_spin_unlock(&tr->max_lock);
1384 }
1385
1386 /**
1387 * update_max_tr_single - only copy one trace over, and reset the rest
1388 * @tr - tracer
1389 * @tsk - task with the latency
1390 * @cpu - the cpu of the buffer to copy.
1391 *
1392 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393 */
1394 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397 int ret;
1398
1399 if (tr->stop_count)
1400 return;
1401
1402 WARN_ON_ONCE(!irqs_disabled());
1403 if (!tr->allocated_snapshot) {
1404 /* Only the nop tracer should hit this when disabling */
1405 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406 return;
1407 }
1408
1409 arch_spin_lock(&tr->max_lock);
1410
1411 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412
1413 if (ret == -EBUSY) {
1414 /*
1415 * We failed to swap the buffer due to a commit taking
1416 * place on this CPU. We fail to record, but we reset
1417 * the max trace buffer (no one writes directly to it)
1418 * and flag that it failed.
1419 */
1420 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421 "Failed to swap buffers due to commit in progress\n");
1422 }
1423
1424 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425
1426 __update_max_tr(tr, tsk, cpu);
1427 arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430
wait_on_pipe(struct trace_iterator * iter,bool full)1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433 /* Iterators are static, they should be filled or empty */
1434 if (trace_buffer_iter(iter, iter->cpu_file))
1435 return 0;
1436
1437 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438 full);
1439 }
1440
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443
1444 struct trace_selftests {
1445 struct list_head list;
1446 struct tracer *type;
1447 };
1448
1449 static LIST_HEAD(postponed_selftests);
1450
save_selftest(struct tracer * type)1451 static int save_selftest(struct tracer *type)
1452 {
1453 struct trace_selftests *selftest;
1454
1455 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456 if (!selftest)
1457 return -ENOMEM;
1458
1459 selftest->type = type;
1460 list_add(&selftest->list, &postponed_selftests);
1461 return 0;
1462 }
1463
run_tracer_selftest(struct tracer * type)1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466 struct trace_array *tr = &global_trace;
1467 struct tracer *saved_tracer = tr->current_trace;
1468 int ret;
1469
1470 if (!type->selftest || tracing_selftest_disabled)
1471 return 0;
1472
1473 /*
1474 * If a tracer registers early in boot up (before scheduling is
1475 * initialized and such), then do not run its selftests yet.
1476 * Instead, run it a little later in the boot process.
1477 */
1478 if (!selftests_can_run)
1479 return save_selftest(type);
1480
1481 /*
1482 * Run a selftest on this tracer.
1483 * Here we reset the trace buffer, and set the current
1484 * tracer to be this tracer. The tracer can then run some
1485 * internal tracing to verify that everything is in order.
1486 * If we fail, we do not register this tracer.
1487 */
1488 tracing_reset_online_cpus(&tr->trace_buffer);
1489
1490 tr->current_trace = type;
1491
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493 if (type->use_max_tr) {
1494 /* If we expanded the buffers, make sure the max is expanded too */
1495 if (ring_buffer_expanded)
1496 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497 RING_BUFFER_ALL_CPUS);
1498 tr->allocated_snapshot = true;
1499 }
1500 #endif
1501
1502 /* the test is responsible for initializing and enabling */
1503 pr_info("Testing tracer %s: ", type->name);
1504 ret = type->selftest(type, tr);
1505 /* the test is responsible for resetting too */
1506 tr->current_trace = saved_tracer;
1507 if (ret) {
1508 printk(KERN_CONT "FAILED!\n");
1509 /* Add the warning after printing 'FAILED' */
1510 WARN_ON(1);
1511 return -1;
1512 }
1513 /* Only reset on passing, to avoid touching corrupted buffers */
1514 tracing_reset_online_cpus(&tr->trace_buffer);
1515
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517 if (type->use_max_tr) {
1518 tr->allocated_snapshot = false;
1519
1520 /* Shrink the max buffer again */
1521 if (ring_buffer_expanded)
1522 ring_buffer_resize(tr->max_buffer.buffer, 1,
1523 RING_BUFFER_ALL_CPUS);
1524 }
1525 #endif
1526
1527 printk(KERN_CONT "PASSED\n");
1528 return 0;
1529 }
1530
init_trace_selftests(void)1531 static __init int init_trace_selftests(void)
1532 {
1533 struct trace_selftests *p, *n;
1534 struct tracer *t, **last;
1535 int ret;
1536
1537 selftests_can_run = true;
1538
1539 mutex_lock(&trace_types_lock);
1540
1541 if (list_empty(&postponed_selftests))
1542 goto out;
1543
1544 pr_info("Running postponed tracer tests:\n");
1545
1546 tracing_selftest_running = true;
1547 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1548 ret = run_tracer_selftest(p->type);
1549 /* If the test fails, then warn and remove from available_tracers */
1550 if (ret < 0) {
1551 WARN(1, "tracer: %s failed selftest, disabling\n",
1552 p->type->name);
1553 last = &trace_types;
1554 for (t = trace_types; t; t = t->next) {
1555 if (t == p->type) {
1556 *last = t->next;
1557 break;
1558 }
1559 last = &t->next;
1560 }
1561 }
1562 list_del(&p->list);
1563 kfree(p);
1564 }
1565 tracing_selftest_running = false;
1566
1567 out:
1568 mutex_unlock(&trace_types_lock);
1569
1570 return 0;
1571 }
1572 core_initcall(init_trace_selftests);
1573 #else
run_tracer_selftest(struct tracer * type)1574 static inline int run_tracer_selftest(struct tracer *type)
1575 {
1576 return 0;
1577 }
1578 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1579
1580 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1581
1582 static void __init apply_trace_boot_options(void);
1583
1584 /**
1585 * register_tracer - register a tracer with the ftrace system.
1586 * @type - the plugin for the tracer
1587 *
1588 * Register a new plugin tracer.
1589 */
register_tracer(struct tracer * type)1590 int __init register_tracer(struct tracer *type)
1591 {
1592 struct tracer *t;
1593 int ret = 0;
1594
1595 if (!type->name) {
1596 pr_info("Tracer must have a name\n");
1597 return -1;
1598 }
1599
1600 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1601 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1602 return -1;
1603 }
1604
1605 mutex_lock(&trace_types_lock);
1606
1607 tracing_selftest_running = true;
1608
1609 for (t = trace_types; t; t = t->next) {
1610 if (strcmp(type->name, t->name) == 0) {
1611 /* already found */
1612 pr_info("Tracer %s already registered\n",
1613 type->name);
1614 ret = -1;
1615 goto out;
1616 }
1617 }
1618
1619 if (!type->set_flag)
1620 type->set_flag = &dummy_set_flag;
1621 if (!type->flags) {
1622 /*allocate a dummy tracer_flags*/
1623 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1624 if (!type->flags) {
1625 ret = -ENOMEM;
1626 goto out;
1627 }
1628 type->flags->val = 0;
1629 type->flags->opts = dummy_tracer_opt;
1630 } else
1631 if (!type->flags->opts)
1632 type->flags->opts = dummy_tracer_opt;
1633
1634 /* store the tracer for __set_tracer_option */
1635 type->flags->trace = type;
1636
1637 ret = run_tracer_selftest(type);
1638 if (ret < 0)
1639 goto out;
1640
1641 type->next = trace_types;
1642 trace_types = type;
1643 add_tracer_options(&global_trace, type);
1644
1645 out:
1646 tracing_selftest_running = false;
1647 mutex_unlock(&trace_types_lock);
1648
1649 if (ret || !default_bootup_tracer)
1650 goto out_unlock;
1651
1652 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1653 goto out_unlock;
1654
1655 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1656 /* Do we want this tracer to start on bootup? */
1657 tracing_set_tracer(&global_trace, type->name);
1658 default_bootup_tracer = NULL;
1659
1660 apply_trace_boot_options();
1661
1662 /* disable other selftests, since this will break it. */
1663 tracing_selftest_disabled = true;
1664 #ifdef CONFIG_FTRACE_STARTUP_TEST
1665 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1666 type->name);
1667 #endif
1668
1669 out_unlock:
1670 return ret;
1671 }
1672
tracing_reset(struct trace_buffer * buf,int cpu)1673 void tracing_reset(struct trace_buffer *buf, int cpu)
1674 {
1675 struct ring_buffer *buffer = buf->buffer;
1676
1677 if (!buffer)
1678 return;
1679
1680 ring_buffer_record_disable(buffer);
1681
1682 /* Make sure all commits have finished */
1683 synchronize_sched();
1684 ring_buffer_reset_cpu(buffer, cpu);
1685
1686 ring_buffer_record_enable(buffer);
1687 }
1688
tracing_reset_online_cpus(struct trace_buffer * buf)1689 void tracing_reset_online_cpus(struct trace_buffer *buf)
1690 {
1691 struct ring_buffer *buffer = buf->buffer;
1692 int cpu;
1693
1694 if (!buffer)
1695 return;
1696
1697 ring_buffer_record_disable(buffer);
1698
1699 /* Make sure all commits have finished */
1700 synchronize_sched();
1701
1702 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1703
1704 for_each_online_cpu(cpu)
1705 ring_buffer_reset_cpu(buffer, cpu);
1706
1707 ring_buffer_record_enable(buffer);
1708 }
1709
1710 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)1711 void tracing_reset_all_online_cpus(void)
1712 {
1713 struct trace_array *tr;
1714
1715 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1716 if (!tr->clear_trace)
1717 continue;
1718 tr->clear_trace = false;
1719 tracing_reset_online_cpus(&tr->trace_buffer);
1720 #ifdef CONFIG_TRACER_MAX_TRACE
1721 tracing_reset_online_cpus(&tr->max_buffer);
1722 #endif
1723 }
1724 }
1725
1726 static int *tgid_map;
1727
1728 #define SAVED_CMDLINES_DEFAULT 128
1729 #define NO_CMDLINE_MAP UINT_MAX
1730 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1731 struct saved_cmdlines_buffer {
1732 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1733 unsigned *map_cmdline_to_pid;
1734 unsigned cmdline_num;
1735 int cmdline_idx;
1736 char *saved_cmdlines;
1737 };
1738 static struct saved_cmdlines_buffer *savedcmd;
1739
1740 /* temporary disable recording */
1741 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1742
get_saved_cmdlines(int idx)1743 static inline char *get_saved_cmdlines(int idx)
1744 {
1745 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1746 }
1747
set_cmdline(int idx,const char * cmdline)1748 static inline void set_cmdline(int idx, const char *cmdline)
1749 {
1750 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1751 }
1752
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)1753 static int allocate_cmdlines_buffer(unsigned int val,
1754 struct saved_cmdlines_buffer *s)
1755 {
1756 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1757 GFP_KERNEL);
1758 if (!s->map_cmdline_to_pid)
1759 return -ENOMEM;
1760
1761 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1762 if (!s->saved_cmdlines) {
1763 kfree(s->map_cmdline_to_pid);
1764 return -ENOMEM;
1765 }
1766
1767 s->cmdline_idx = 0;
1768 s->cmdline_num = val;
1769 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1770 sizeof(s->map_pid_to_cmdline));
1771 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1772 val * sizeof(*s->map_cmdline_to_pid));
1773
1774 return 0;
1775 }
1776
trace_create_savedcmd(void)1777 static int trace_create_savedcmd(void)
1778 {
1779 int ret;
1780
1781 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1782 if (!savedcmd)
1783 return -ENOMEM;
1784
1785 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1786 if (ret < 0) {
1787 kfree(savedcmd);
1788 savedcmd = NULL;
1789 return -ENOMEM;
1790 }
1791
1792 return 0;
1793 }
1794
is_tracing_stopped(void)1795 int is_tracing_stopped(void)
1796 {
1797 return global_trace.stop_count;
1798 }
1799
1800 /**
1801 * tracing_start - quick start of the tracer
1802 *
1803 * If tracing is enabled but was stopped by tracing_stop,
1804 * this will start the tracer back up.
1805 */
tracing_start(void)1806 void tracing_start(void)
1807 {
1808 struct ring_buffer *buffer;
1809 unsigned long flags;
1810
1811 if (tracing_disabled)
1812 return;
1813
1814 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1815 if (--global_trace.stop_count) {
1816 if (global_trace.stop_count < 0) {
1817 /* Someone screwed up their debugging */
1818 WARN_ON_ONCE(1);
1819 global_trace.stop_count = 0;
1820 }
1821 goto out;
1822 }
1823
1824 /* Prevent the buffers from switching */
1825 arch_spin_lock(&global_trace.max_lock);
1826
1827 buffer = global_trace.trace_buffer.buffer;
1828 if (buffer)
1829 ring_buffer_record_enable(buffer);
1830
1831 #ifdef CONFIG_TRACER_MAX_TRACE
1832 buffer = global_trace.max_buffer.buffer;
1833 if (buffer)
1834 ring_buffer_record_enable(buffer);
1835 #endif
1836
1837 arch_spin_unlock(&global_trace.max_lock);
1838
1839 out:
1840 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1841 }
1842
tracing_start_tr(struct trace_array * tr)1843 static void tracing_start_tr(struct trace_array *tr)
1844 {
1845 struct ring_buffer *buffer;
1846 unsigned long flags;
1847
1848 if (tracing_disabled)
1849 return;
1850
1851 /* If global, we need to also start the max tracer */
1852 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1853 return tracing_start();
1854
1855 raw_spin_lock_irqsave(&tr->start_lock, flags);
1856
1857 if (--tr->stop_count) {
1858 if (tr->stop_count < 0) {
1859 /* Someone screwed up their debugging */
1860 WARN_ON_ONCE(1);
1861 tr->stop_count = 0;
1862 }
1863 goto out;
1864 }
1865
1866 buffer = tr->trace_buffer.buffer;
1867 if (buffer)
1868 ring_buffer_record_enable(buffer);
1869
1870 out:
1871 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1872 }
1873
1874 /**
1875 * tracing_stop - quick stop of the tracer
1876 *
1877 * Light weight way to stop tracing. Use in conjunction with
1878 * tracing_start.
1879 */
tracing_stop(void)1880 void tracing_stop(void)
1881 {
1882 struct ring_buffer *buffer;
1883 unsigned long flags;
1884
1885 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1886 if (global_trace.stop_count++)
1887 goto out;
1888
1889 /* Prevent the buffers from switching */
1890 arch_spin_lock(&global_trace.max_lock);
1891
1892 buffer = global_trace.trace_buffer.buffer;
1893 if (buffer)
1894 ring_buffer_record_disable(buffer);
1895
1896 #ifdef CONFIG_TRACER_MAX_TRACE
1897 buffer = global_trace.max_buffer.buffer;
1898 if (buffer)
1899 ring_buffer_record_disable(buffer);
1900 #endif
1901
1902 arch_spin_unlock(&global_trace.max_lock);
1903
1904 out:
1905 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1906 }
1907
tracing_stop_tr(struct trace_array * tr)1908 static void tracing_stop_tr(struct trace_array *tr)
1909 {
1910 struct ring_buffer *buffer;
1911 unsigned long flags;
1912
1913 /* If global, we need to also stop the max tracer */
1914 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1915 return tracing_stop();
1916
1917 raw_spin_lock_irqsave(&tr->start_lock, flags);
1918 if (tr->stop_count++)
1919 goto out;
1920
1921 buffer = tr->trace_buffer.buffer;
1922 if (buffer)
1923 ring_buffer_record_disable(buffer);
1924
1925 out:
1926 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1927 }
1928
trace_save_cmdline(struct task_struct * tsk)1929 static int trace_save_cmdline(struct task_struct *tsk)
1930 {
1931 unsigned pid, idx;
1932
1933 /* treat recording of idle task as a success */
1934 if (!tsk->pid)
1935 return 1;
1936
1937 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1938 return 0;
1939
1940 /*
1941 * It's not the end of the world if we don't get
1942 * the lock, but we also don't want to spin
1943 * nor do we want to disable interrupts,
1944 * so if we miss here, then better luck next time.
1945 */
1946 if (!arch_spin_trylock(&trace_cmdline_lock))
1947 return 0;
1948
1949 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1950 if (idx == NO_CMDLINE_MAP) {
1951 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1952
1953 /*
1954 * Check whether the cmdline buffer at idx has a pid
1955 * mapped. We are going to overwrite that entry so we
1956 * need to clear the map_pid_to_cmdline. Otherwise we
1957 * would read the new comm for the old pid.
1958 */
1959 pid = savedcmd->map_cmdline_to_pid[idx];
1960 if (pid != NO_CMDLINE_MAP)
1961 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1962
1963 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1964 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1965
1966 savedcmd->cmdline_idx = idx;
1967 }
1968
1969 set_cmdline(idx, tsk->comm);
1970
1971 arch_spin_unlock(&trace_cmdline_lock);
1972
1973 return 1;
1974 }
1975
__trace_find_cmdline(int pid,char comm[])1976 static void __trace_find_cmdline(int pid, char comm[])
1977 {
1978 unsigned map;
1979
1980 if (!pid) {
1981 strcpy(comm, "<idle>");
1982 return;
1983 }
1984
1985 if (WARN_ON_ONCE(pid < 0)) {
1986 strcpy(comm, "<XXX>");
1987 return;
1988 }
1989
1990 if (pid > PID_MAX_DEFAULT) {
1991 strcpy(comm, "<...>");
1992 return;
1993 }
1994
1995 map = savedcmd->map_pid_to_cmdline[pid];
1996 if (map != NO_CMDLINE_MAP)
1997 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1998 else
1999 strcpy(comm, "<...>");
2000 }
2001
trace_find_cmdline(int pid,char comm[])2002 void trace_find_cmdline(int pid, char comm[])
2003 {
2004 preempt_disable();
2005 arch_spin_lock(&trace_cmdline_lock);
2006
2007 __trace_find_cmdline(pid, comm);
2008
2009 arch_spin_unlock(&trace_cmdline_lock);
2010 preempt_enable();
2011 }
2012
trace_find_tgid(int pid)2013 int trace_find_tgid(int pid)
2014 {
2015 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2016 return 0;
2017
2018 return tgid_map[pid];
2019 }
2020
trace_save_tgid(struct task_struct * tsk)2021 static int trace_save_tgid(struct task_struct *tsk)
2022 {
2023 /* treat recording of idle task as a success */
2024 if (!tsk->pid)
2025 return 1;
2026
2027 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2028 return 0;
2029
2030 tgid_map[tsk->pid] = tsk->tgid;
2031 return 1;
2032 }
2033
tracing_record_taskinfo_skip(int flags)2034 static bool tracing_record_taskinfo_skip(int flags)
2035 {
2036 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2037 return true;
2038 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2039 return true;
2040 if (!__this_cpu_read(trace_taskinfo_save))
2041 return true;
2042 return false;
2043 }
2044
2045 /**
2046 * tracing_record_taskinfo - record the task info of a task
2047 *
2048 * @task - task to record
2049 * @flags - TRACE_RECORD_CMDLINE for recording comm
2050 * - TRACE_RECORD_TGID for recording tgid
2051 */
tracing_record_taskinfo(struct task_struct * task,int flags)2052 void tracing_record_taskinfo(struct task_struct *task, int flags)
2053 {
2054 bool done;
2055
2056 if (tracing_record_taskinfo_skip(flags))
2057 return;
2058
2059 /*
2060 * Record as much task information as possible. If some fail, continue
2061 * to try to record the others.
2062 */
2063 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2064 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2065
2066 /* If recording any information failed, retry again soon. */
2067 if (!done)
2068 return;
2069
2070 __this_cpu_write(trace_taskinfo_save, false);
2071 }
2072
2073 /**
2074 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2075 *
2076 * @prev - previous task during sched_switch
2077 * @next - next task during sched_switch
2078 * @flags - TRACE_RECORD_CMDLINE for recording comm
2079 * TRACE_RECORD_TGID for recording tgid
2080 */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2081 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2082 struct task_struct *next, int flags)
2083 {
2084 bool done;
2085
2086 if (tracing_record_taskinfo_skip(flags))
2087 return;
2088
2089 /*
2090 * Record as much task information as possible. If some fail, continue
2091 * to try to record the others.
2092 */
2093 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2094 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2095 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2096 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2097
2098 /* If recording any information failed, retry again soon. */
2099 if (!done)
2100 return;
2101
2102 __this_cpu_write(trace_taskinfo_save, false);
2103 }
2104
2105 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2106 void tracing_record_cmdline(struct task_struct *task)
2107 {
2108 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2109 }
2110
tracing_record_tgid(struct task_struct * task)2111 void tracing_record_tgid(struct task_struct *task)
2112 {
2113 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2114 }
2115
2116 /*
2117 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2118 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2119 * simplifies those functions and keeps them in sync.
2120 */
trace_handle_return(struct trace_seq * s)2121 enum print_line_t trace_handle_return(struct trace_seq *s)
2122 {
2123 return trace_seq_has_overflowed(s) ?
2124 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2125 }
2126 EXPORT_SYMBOL_GPL(trace_handle_return);
2127
2128 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned long flags,int pc)2129 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2130 int pc)
2131 {
2132 struct task_struct *tsk = current;
2133
2134 entry->preempt_count = pc & 0xff;
2135 entry->pid = (tsk) ? tsk->pid : 0;
2136 entry->flags =
2137 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2138 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2139 #else
2140 TRACE_FLAG_IRQS_NOSUPPORT |
2141 #endif
2142 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2143 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2144 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2145 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2146 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2147 }
2148 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2149
2150 struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2151 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2152 int type,
2153 unsigned long len,
2154 unsigned long flags, int pc)
2155 {
2156 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2157 }
2158
2159 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2160 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2161 static int trace_buffered_event_ref;
2162
2163 /**
2164 * trace_buffered_event_enable - enable buffering events
2165 *
2166 * When events are being filtered, it is quicker to use a temporary
2167 * buffer to write the event data into if there's a likely chance
2168 * that it will not be committed. The discard of the ring buffer
2169 * is not as fast as committing, and is much slower than copying
2170 * a commit.
2171 *
2172 * When an event is to be filtered, allocate per cpu buffers to
2173 * write the event data into, and if the event is filtered and discarded
2174 * it is simply dropped, otherwise, the entire data is to be committed
2175 * in one shot.
2176 */
trace_buffered_event_enable(void)2177 void trace_buffered_event_enable(void)
2178 {
2179 struct ring_buffer_event *event;
2180 struct page *page;
2181 int cpu;
2182
2183 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2184
2185 if (trace_buffered_event_ref++)
2186 return;
2187
2188 for_each_tracing_cpu(cpu) {
2189 page = alloc_pages_node(cpu_to_node(cpu),
2190 GFP_KERNEL | __GFP_NORETRY, 0);
2191 if (!page)
2192 goto failed;
2193
2194 event = page_address(page);
2195 memset(event, 0, sizeof(*event));
2196
2197 per_cpu(trace_buffered_event, cpu) = event;
2198
2199 preempt_disable();
2200 if (cpu == smp_processor_id() &&
2201 this_cpu_read(trace_buffered_event) !=
2202 per_cpu(trace_buffered_event, cpu))
2203 WARN_ON_ONCE(1);
2204 preempt_enable();
2205 }
2206
2207 return;
2208 failed:
2209 trace_buffered_event_disable();
2210 }
2211
enable_trace_buffered_event(void * data)2212 static void enable_trace_buffered_event(void *data)
2213 {
2214 /* Probably not needed, but do it anyway */
2215 smp_rmb();
2216 this_cpu_dec(trace_buffered_event_cnt);
2217 }
2218
disable_trace_buffered_event(void * data)2219 static void disable_trace_buffered_event(void *data)
2220 {
2221 this_cpu_inc(trace_buffered_event_cnt);
2222 }
2223
2224 /**
2225 * trace_buffered_event_disable - disable buffering events
2226 *
2227 * When a filter is removed, it is faster to not use the buffered
2228 * events, and to commit directly into the ring buffer. Free up
2229 * the temp buffers when there are no more users. This requires
2230 * special synchronization with current events.
2231 */
trace_buffered_event_disable(void)2232 void trace_buffered_event_disable(void)
2233 {
2234 int cpu;
2235
2236 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2237
2238 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2239 return;
2240
2241 if (--trace_buffered_event_ref)
2242 return;
2243
2244 preempt_disable();
2245 /* For each CPU, set the buffer as used. */
2246 smp_call_function_many(tracing_buffer_mask,
2247 disable_trace_buffered_event, NULL, 1);
2248 preempt_enable();
2249
2250 /* Wait for all current users to finish */
2251 synchronize_sched();
2252
2253 for_each_tracing_cpu(cpu) {
2254 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2255 per_cpu(trace_buffered_event, cpu) = NULL;
2256 }
2257 /*
2258 * Make sure trace_buffered_event is NULL before clearing
2259 * trace_buffered_event_cnt.
2260 */
2261 smp_wmb();
2262
2263 preempt_disable();
2264 /* Do the work on each cpu */
2265 smp_call_function_many(tracing_buffer_mask,
2266 enable_trace_buffered_event, NULL, 1);
2267 preempt_enable();
2268 }
2269
2270 static struct ring_buffer *temp_buffer;
2271
2272 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2273 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2274 struct trace_event_file *trace_file,
2275 int type, unsigned long len,
2276 unsigned long flags, int pc)
2277 {
2278 struct ring_buffer_event *entry;
2279 int val;
2280
2281 *current_rb = trace_file->tr->trace_buffer.buffer;
2282
2283 if ((trace_file->flags &
2284 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2285 (entry = this_cpu_read(trace_buffered_event))) {
2286 /* Try to use the per cpu buffer first */
2287 val = this_cpu_inc_return(trace_buffered_event_cnt);
2288 if (val == 1) {
2289 trace_event_setup(entry, type, flags, pc);
2290 entry->array[0] = len;
2291 return entry;
2292 }
2293 this_cpu_dec(trace_buffered_event_cnt);
2294 }
2295
2296 entry = __trace_buffer_lock_reserve(*current_rb,
2297 type, len, flags, pc);
2298 /*
2299 * If tracing is off, but we have triggers enabled
2300 * we still need to look at the event data. Use the temp_buffer
2301 * to store the trace event for the tigger to use. It's recusive
2302 * safe and will not be recorded anywhere.
2303 */
2304 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2305 *current_rb = temp_buffer;
2306 entry = __trace_buffer_lock_reserve(*current_rb,
2307 type, len, flags, pc);
2308 }
2309 return entry;
2310 }
2311 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2312
2313 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2314 static DEFINE_MUTEX(tracepoint_printk_mutex);
2315
output_printk(struct trace_event_buffer * fbuffer)2316 static void output_printk(struct trace_event_buffer *fbuffer)
2317 {
2318 struct trace_event_call *event_call;
2319 struct trace_event *event;
2320 unsigned long flags;
2321 struct trace_iterator *iter = tracepoint_print_iter;
2322
2323 /* We should never get here if iter is NULL */
2324 if (WARN_ON_ONCE(!iter))
2325 return;
2326
2327 event_call = fbuffer->trace_file->event_call;
2328 if (!event_call || !event_call->event.funcs ||
2329 !event_call->event.funcs->trace)
2330 return;
2331
2332 event = &fbuffer->trace_file->event_call->event;
2333
2334 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2335 trace_seq_init(&iter->seq);
2336 iter->ent = fbuffer->entry;
2337 event_call->event.funcs->trace(iter, 0, event);
2338 trace_seq_putc(&iter->seq, 0);
2339 printk("%s", iter->seq.buffer);
2340
2341 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2342 }
2343
tracepoint_printk_sysctl(struct ctl_table * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2344 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2345 void __user *buffer, size_t *lenp,
2346 loff_t *ppos)
2347 {
2348 int save_tracepoint_printk;
2349 int ret;
2350
2351 mutex_lock(&tracepoint_printk_mutex);
2352 save_tracepoint_printk = tracepoint_printk;
2353
2354 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2355
2356 /*
2357 * This will force exiting early, as tracepoint_printk
2358 * is always zero when tracepoint_printk_iter is not allocated
2359 */
2360 if (!tracepoint_print_iter)
2361 tracepoint_printk = 0;
2362
2363 if (save_tracepoint_printk == tracepoint_printk)
2364 goto out;
2365
2366 if (tracepoint_printk)
2367 static_key_enable(&tracepoint_printk_key.key);
2368 else
2369 static_key_disable(&tracepoint_printk_key.key);
2370
2371 out:
2372 mutex_unlock(&tracepoint_printk_mutex);
2373
2374 return ret;
2375 }
2376
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2377 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2378 {
2379 if (static_key_false(&tracepoint_printk_key.key))
2380 output_printk(fbuffer);
2381
2382 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2383 fbuffer->event, fbuffer->entry,
2384 fbuffer->flags, fbuffer->pc);
2385 }
2386 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2387
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2388 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2389 struct ring_buffer *buffer,
2390 struct ring_buffer_event *event,
2391 unsigned long flags, int pc,
2392 struct pt_regs *regs)
2393 {
2394 __buffer_unlock_commit(buffer, event);
2395
2396 /*
2397 * If regs is not set, then skip the following callers:
2398 * trace_buffer_unlock_commit_regs
2399 * event_trigger_unlock_commit
2400 * trace_event_buffer_commit
2401 * trace_event_raw_event_sched_switch
2402 * Note, we can still get here via blktrace, wakeup tracer
2403 * and mmiotrace, but that's ok if they lose a function or
2404 * two. They are that meaningful.
2405 */
2406 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2407 ftrace_trace_userstack(buffer, flags, pc);
2408 }
2409
2410 /*
2411 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2412 */
2413 void
trace_buffer_unlock_commit_nostack(struct ring_buffer * buffer,struct ring_buffer_event * event)2414 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2415 struct ring_buffer_event *event)
2416 {
2417 __buffer_unlock_commit(buffer, event);
2418 }
2419
2420 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event)2421 trace_process_export(struct trace_export *export,
2422 struct ring_buffer_event *event)
2423 {
2424 struct trace_entry *entry;
2425 unsigned int size = 0;
2426
2427 entry = ring_buffer_event_data(event);
2428 size = ring_buffer_event_length(event);
2429 export->write(entry, size);
2430 }
2431
2432 static DEFINE_MUTEX(ftrace_export_lock);
2433
2434 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2435
2436 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2437
ftrace_exports_enable(void)2438 static inline void ftrace_exports_enable(void)
2439 {
2440 static_branch_enable(&ftrace_exports_enabled);
2441 }
2442
ftrace_exports_disable(void)2443 static inline void ftrace_exports_disable(void)
2444 {
2445 static_branch_disable(&ftrace_exports_enabled);
2446 }
2447
ftrace_exports(struct ring_buffer_event * event)2448 void ftrace_exports(struct ring_buffer_event *event)
2449 {
2450 struct trace_export *export;
2451
2452 preempt_disable_notrace();
2453
2454 export = rcu_dereference_raw_notrace(ftrace_exports_list);
2455 while (export) {
2456 trace_process_export(export, event);
2457 export = rcu_dereference_raw_notrace(export->next);
2458 }
2459
2460 preempt_enable_notrace();
2461 }
2462
2463 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)2464 add_trace_export(struct trace_export **list, struct trace_export *export)
2465 {
2466 rcu_assign_pointer(export->next, *list);
2467 /*
2468 * We are entering export into the list but another
2469 * CPU might be walking that list. We need to make sure
2470 * the export->next pointer is valid before another CPU sees
2471 * the export pointer included into the list.
2472 */
2473 rcu_assign_pointer(*list, export);
2474 }
2475
2476 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)2477 rm_trace_export(struct trace_export **list, struct trace_export *export)
2478 {
2479 struct trace_export **p;
2480
2481 for (p = list; *p != NULL; p = &(*p)->next)
2482 if (*p == export)
2483 break;
2484
2485 if (*p != export)
2486 return -1;
2487
2488 rcu_assign_pointer(*p, (*p)->next);
2489
2490 return 0;
2491 }
2492
2493 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)2494 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2495 {
2496 if (*list == NULL)
2497 ftrace_exports_enable();
2498
2499 add_trace_export(list, export);
2500 }
2501
2502 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)2503 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2504 {
2505 int ret;
2506
2507 ret = rm_trace_export(list, export);
2508 if (*list == NULL)
2509 ftrace_exports_disable();
2510
2511 return ret;
2512 }
2513
register_ftrace_export(struct trace_export * export)2514 int register_ftrace_export(struct trace_export *export)
2515 {
2516 if (WARN_ON_ONCE(!export->write))
2517 return -1;
2518
2519 mutex_lock(&ftrace_export_lock);
2520
2521 add_ftrace_export(&ftrace_exports_list, export);
2522
2523 mutex_unlock(&ftrace_export_lock);
2524
2525 return 0;
2526 }
2527 EXPORT_SYMBOL_GPL(register_ftrace_export);
2528
unregister_ftrace_export(struct trace_export * export)2529 int unregister_ftrace_export(struct trace_export *export)
2530 {
2531 int ret;
2532
2533 mutex_lock(&ftrace_export_lock);
2534
2535 ret = rm_ftrace_export(&ftrace_exports_list, export);
2536
2537 mutex_unlock(&ftrace_export_lock);
2538
2539 return ret;
2540 }
2541 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2542
2543 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2544 trace_function(struct trace_array *tr,
2545 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2546 int pc)
2547 {
2548 struct trace_event_call *call = &event_function;
2549 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2550 struct ring_buffer_event *event;
2551 struct ftrace_entry *entry;
2552
2553 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2554 flags, pc);
2555 if (!event)
2556 return;
2557 entry = ring_buffer_event_data(event);
2558 entry->ip = ip;
2559 entry->parent_ip = parent_ip;
2560
2561 if (!call_filter_check_discard(call, entry, buffer, event)) {
2562 if (static_branch_unlikely(&ftrace_exports_enabled))
2563 ftrace_exports(event);
2564 __buffer_unlock_commit(buffer, event);
2565 }
2566 }
2567
2568 #ifdef CONFIG_STACKTRACE
2569
2570 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2571 struct ftrace_stack {
2572 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2573 };
2574
2575 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2576 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2577
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2578 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2579 unsigned long flags,
2580 int skip, int pc, struct pt_regs *regs)
2581 {
2582 struct trace_event_call *call = &event_kernel_stack;
2583 struct ring_buffer_event *event;
2584 struct stack_entry *entry;
2585 struct stack_trace trace;
2586 int use_stack;
2587 int size = FTRACE_STACK_ENTRIES;
2588
2589 trace.nr_entries = 0;
2590 trace.skip = skip;
2591
2592 /*
2593 * Add two, for this function and the call to save_stack_trace()
2594 * If regs is set, then these functions will not be in the way.
2595 */
2596 if (!regs)
2597 trace.skip += 2;
2598
2599 /*
2600 * Since events can happen in NMIs there's no safe way to
2601 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2602 * or NMI comes in, it will just have to use the default
2603 * FTRACE_STACK_SIZE.
2604 */
2605 preempt_disable_notrace();
2606
2607 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2608 /*
2609 * We don't need any atomic variables, just a barrier.
2610 * If an interrupt comes in, we don't care, because it would
2611 * have exited and put the counter back to what we want.
2612 * We just need a barrier to keep gcc from moving things
2613 * around.
2614 */
2615 barrier();
2616 if (use_stack == 1) {
2617 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2618 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2619
2620 if (regs)
2621 save_stack_trace_regs(regs, &trace);
2622 else
2623 save_stack_trace(&trace);
2624
2625 if (trace.nr_entries > size)
2626 size = trace.nr_entries;
2627 } else
2628 /* From now on, use_stack is a boolean */
2629 use_stack = 0;
2630
2631 size *= sizeof(unsigned long);
2632
2633 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2634 sizeof(*entry) + size, flags, pc);
2635 if (!event)
2636 goto out;
2637 entry = ring_buffer_event_data(event);
2638
2639 memset(&entry->caller, 0, size);
2640
2641 if (use_stack)
2642 memcpy(&entry->caller, trace.entries,
2643 trace.nr_entries * sizeof(unsigned long));
2644 else {
2645 trace.max_entries = FTRACE_STACK_ENTRIES;
2646 trace.entries = entry->caller;
2647 if (regs)
2648 save_stack_trace_regs(regs, &trace);
2649 else
2650 save_stack_trace(&trace);
2651 }
2652
2653 entry->size = trace.nr_entries;
2654
2655 if (!call_filter_check_discard(call, entry, buffer, event))
2656 __buffer_unlock_commit(buffer, event);
2657
2658 out:
2659 /* Again, don't let gcc optimize things here */
2660 barrier();
2661 __this_cpu_dec(ftrace_stack_reserve);
2662 preempt_enable_notrace();
2663
2664 }
2665
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2666 static inline void ftrace_trace_stack(struct trace_array *tr,
2667 struct ring_buffer *buffer,
2668 unsigned long flags,
2669 int skip, int pc, struct pt_regs *regs)
2670 {
2671 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2672 return;
2673
2674 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2675 }
2676
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)2677 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2678 int pc)
2679 {
2680 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2681
2682 if (rcu_is_watching()) {
2683 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2684 return;
2685 }
2686
2687 /*
2688 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2689 * but if the above rcu_is_watching() failed, then the NMI
2690 * triggered someplace critical, and rcu_irq_enter() should
2691 * not be called from NMI.
2692 */
2693 if (unlikely(in_nmi()))
2694 return;
2695
2696 /*
2697 * It is possible that a function is being traced in a
2698 * location that RCU is not watching. A call to
2699 * rcu_irq_enter() will make sure that it is, but there's
2700 * a few internal rcu functions that could be traced
2701 * where that wont work either. In those cases, we just
2702 * do nothing.
2703 */
2704 if (unlikely(rcu_irq_enter_disabled()))
2705 return;
2706
2707 rcu_irq_enter_irqson();
2708 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2709 rcu_irq_exit_irqson();
2710 }
2711
2712 /**
2713 * trace_dump_stack - record a stack back trace in the trace buffer
2714 * @skip: Number of functions to skip (helper handlers)
2715 */
trace_dump_stack(int skip)2716 void trace_dump_stack(int skip)
2717 {
2718 unsigned long flags;
2719
2720 if (tracing_disabled || tracing_selftest_running)
2721 return;
2722
2723 local_save_flags(flags);
2724
2725 /*
2726 * Skip 3 more, seems to get us at the caller of
2727 * this function.
2728 */
2729 skip += 3;
2730 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2731 flags, skip, preempt_count(), NULL);
2732 }
2733
2734 static DEFINE_PER_CPU(int, user_stack_count);
2735
2736 void
ftrace_trace_userstack(struct ring_buffer * buffer,unsigned long flags,int pc)2737 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2738 {
2739 struct trace_event_call *call = &event_user_stack;
2740 struct ring_buffer_event *event;
2741 struct userstack_entry *entry;
2742 struct stack_trace trace;
2743
2744 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2745 return;
2746
2747 /*
2748 * NMIs can not handle page faults, even with fix ups.
2749 * The save user stack can (and often does) fault.
2750 */
2751 if (unlikely(in_nmi()))
2752 return;
2753
2754 /*
2755 * prevent recursion, since the user stack tracing may
2756 * trigger other kernel events.
2757 */
2758 preempt_disable();
2759 if (__this_cpu_read(user_stack_count))
2760 goto out;
2761
2762 __this_cpu_inc(user_stack_count);
2763
2764 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2765 sizeof(*entry), flags, pc);
2766 if (!event)
2767 goto out_drop_count;
2768 entry = ring_buffer_event_data(event);
2769
2770 entry->tgid = current->tgid;
2771 memset(&entry->caller, 0, sizeof(entry->caller));
2772
2773 trace.nr_entries = 0;
2774 trace.max_entries = FTRACE_STACK_ENTRIES;
2775 trace.skip = 0;
2776 trace.entries = entry->caller;
2777
2778 save_stack_trace_user(&trace);
2779 if (!call_filter_check_discard(call, entry, buffer, event))
2780 __buffer_unlock_commit(buffer, event);
2781
2782 out_drop_count:
2783 __this_cpu_dec(user_stack_count);
2784 out:
2785 preempt_enable();
2786 }
2787
2788 #ifdef UNUSED
__trace_userstack(struct trace_array * tr,unsigned long flags)2789 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2790 {
2791 ftrace_trace_userstack(tr, flags, preempt_count());
2792 }
2793 #endif /* UNUSED */
2794
2795 #endif /* CONFIG_STACKTRACE */
2796
2797 /* created for use with alloc_percpu */
2798 struct trace_buffer_struct {
2799 int nesting;
2800 char buffer[4][TRACE_BUF_SIZE];
2801 };
2802
2803 static struct trace_buffer_struct *trace_percpu_buffer;
2804
2805 /*
2806 * Thise allows for lockless recording. If we're nested too deeply, then
2807 * this returns NULL.
2808 */
get_trace_buf(void)2809 static char *get_trace_buf(void)
2810 {
2811 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2812
2813 if (!buffer || buffer->nesting >= 4)
2814 return NULL;
2815
2816 buffer->nesting++;
2817
2818 /* Interrupts must see nesting incremented before we use the buffer */
2819 barrier();
2820 return &buffer->buffer[buffer->nesting][0];
2821 }
2822
put_trace_buf(void)2823 static void put_trace_buf(void)
2824 {
2825 /* Don't let the decrement of nesting leak before this */
2826 barrier();
2827 this_cpu_dec(trace_percpu_buffer->nesting);
2828 }
2829
alloc_percpu_trace_buffer(void)2830 static int alloc_percpu_trace_buffer(void)
2831 {
2832 struct trace_buffer_struct *buffers;
2833
2834 buffers = alloc_percpu(struct trace_buffer_struct);
2835 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2836 return -ENOMEM;
2837
2838 trace_percpu_buffer = buffers;
2839 return 0;
2840 }
2841
2842 static int buffers_allocated;
2843
trace_printk_init_buffers(void)2844 void trace_printk_init_buffers(void)
2845 {
2846 if (buffers_allocated)
2847 return;
2848
2849 if (alloc_percpu_trace_buffer())
2850 return;
2851
2852 /* trace_printk() is for debug use only. Don't use it in production. */
2853
2854 pr_warn("\n");
2855 pr_warn("**********************************************************\n");
2856 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2857 pr_warn("** **\n");
2858 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2859 pr_warn("** **\n");
2860 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2861 pr_warn("** unsafe for production use. **\n");
2862 pr_warn("** **\n");
2863 pr_warn("** If you see this message and you are not debugging **\n");
2864 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2865 pr_warn("** **\n");
2866 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2867 pr_warn("**********************************************************\n");
2868
2869 /* Expand the buffers to set size */
2870 tracing_update_buffers();
2871
2872 buffers_allocated = 1;
2873
2874 /*
2875 * trace_printk_init_buffers() can be called by modules.
2876 * If that happens, then we need to start cmdline recording
2877 * directly here. If the global_trace.buffer is already
2878 * allocated here, then this was called by module code.
2879 */
2880 if (global_trace.trace_buffer.buffer)
2881 tracing_start_cmdline_record();
2882 }
2883
trace_printk_start_comm(void)2884 void trace_printk_start_comm(void)
2885 {
2886 /* Start tracing comms if trace printk is set */
2887 if (!buffers_allocated)
2888 return;
2889 tracing_start_cmdline_record();
2890 }
2891
trace_printk_start_stop_comm(int enabled)2892 static void trace_printk_start_stop_comm(int enabled)
2893 {
2894 if (!buffers_allocated)
2895 return;
2896
2897 if (enabled)
2898 tracing_start_cmdline_record();
2899 else
2900 tracing_stop_cmdline_record();
2901 }
2902
2903 /**
2904 * trace_vbprintk - write binary msg to tracing buffer
2905 *
2906 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)2907 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2908 {
2909 struct trace_event_call *call = &event_bprint;
2910 struct ring_buffer_event *event;
2911 struct ring_buffer *buffer;
2912 struct trace_array *tr = &global_trace;
2913 struct bprint_entry *entry;
2914 unsigned long flags;
2915 char *tbuffer;
2916 int len = 0, size, pc;
2917
2918 if (unlikely(tracing_selftest_running || tracing_disabled))
2919 return 0;
2920
2921 /* Don't pollute graph traces with trace_vprintk internals */
2922 pause_graph_tracing();
2923
2924 pc = preempt_count();
2925 preempt_disable_notrace();
2926
2927 tbuffer = get_trace_buf();
2928 if (!tbuffer) {
2929 len = 0;
2930 goto out_nobuffer;
2931 }
2932
2933 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2934
2935 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2936 goto out;
2937
2938 local_save_flags(flags);
2939 size = sizeof(*entry) + sizeof(u32) * len;
2940 buffer = tr->trace_buffer.buffer;
2941 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2942 flags, pc);
2943 if (!event)
2944 goto out;
2945 entry = ring_buffer_event_data(event);
2946 entry->ip = ip;
2947 entry->fmt = fmt;
2948
2949 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2950 if (!call_filter_check_discard(call, entry, buffer, event)) {
2951 __buffer_unlock_commit(buffer, event);
2952 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2953 }
2954
2955 out:
2956 put_trace_buf();
2957
2958 out_nobuffer:
2959 preempt_enable_notrace();
2960 unpause_graph_tracing();
2961
2962 return len;
2963 }
2964 EXPORT_SYMBOL_GPL(trace_vbprintk);
2965
2966 __printf(3, 0)
2967 static int
__trace_array_vprintk(struct ring_buffer * buffer,unsigned long ip,const char * fmt,va_list args)2968 __trace_array_vprintk(struct ring_buffer *buffer,
2969 unsigned long ip, const char *fmt, va_list args)
2970 {
2971 struct trace_event_call *call = &event_print;
2972 struct ring_buffer_event *event;
2973 int len = 0, size, pc;
2974 struct print_entry *entry;
2975 unsigned long flags;
2976 char *tbuffer;
2977
2978 if (tracing_disabled || tracing_selftest_running)
2979 return 0;
2980
2981 /* Don't pollute graph traces with trace_vprintk internals */
2982 pause_graph_tracing();
2983
2984 pc = preempt_count();
2985 preempt_disable_notrace();
2986
2987
2988 tbuffer = get_trace_buf();
2989 if (!tbuffer) {
2990 len = 0;
2991 goto out_nobuffer;
2992 }
2993
2994 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2995
2996 local_save_flags(flags);
2997 size = sizeof(*entry) + len + 1;
2998 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2999 flags, pc);
3000 if (!event)
3001 goto out;
3002 entry = ring_buffer_event_data(event);
3003 entry->ip = ip;
3004
3005 memcpy(&entry->buf, tbuffer, len + 1);
3006 if (!call_filter_check_discard(call, entry, buffer, event)) {
3007 __buffer_unlock_commit(buffer, event);
3008 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3009 }
3010
3011 out:
3012 put_trace_buf();
3013
3014 out_nobuffer:
3015 preempt_enable_notrace();
3016 unpause_graph_tracing();
3017
3018 return len;
3019 }
3020
3021 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3022 int trace_array_vprintk(struct trace_array *tr,
3023 unsigned long ip, const char *fmt, va_list args)
3024 {
3025 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3026 }
3027
3028 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3029 int trace_array_printk(struct trace_array *tr,
3030 unsigned long ip, const char *fmt, ...)
3031 {
3032 int ret;
3033 va_list ap;
3034
3035 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3036 return 0;
3037
3038 va_start(ap, fmt);
3039 ret = trace_array_vprintk(tr, ip, fmt, ap);
3040 va_end(ap);
3041 return ret;
3042 }
3043
3044 __printf(3, 4)
trace_array_printk_buf(struct ring_buffer * buffer,unsigned long ip,const char * fmt,...)3045 int trace_array_printk_buf(struct ring_buffer *buffer,
3046 unsigned long ip, const char *fmt, ...)
3047 {
3048 int ret;
3049 va_list ap;
3050
3051 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3052 return 0;
3053
3054 va_start(ap, fmt);
3055 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3056 va_end(ap);
3057 return ret;
3058 }
3059
3060 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3061 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3062 {
3063 return trace_array_vprintk(&global_trace, ip, fmt, args);
3064 }
3065 EXPORT_SYMBOL_GPL(trace_vprintk);
3066
trace_iterator_increment(struct trace_iterator * iter)3067 static void trace_iterator_increment(struct trace_iterator *iter)
3068 {
3069 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3070
3071 iter->idx++;
3072 if (buf_iter)
3073 ring_buffer_read(buf_iter, NULL);
3074 }
3075
3076 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3077 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3078 unsigned long *lost_events)
3079 {
3080 struct ring_buffer_event *event;
3081 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3082
3083 if (buf_iter)
3084 event = ring_buffer_iter_peek(buf_iter, ts);
3085 else
3086 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3087 lost_events);
3088
3089 if (event) {
3090 iter->ent_size = ring_buffer_event_length(event);
3091 return ring_buffer_event_data(event);
3092 }
3093 iter->ent_size = 0;
3094 return NULL;
3095 }
3096
3097 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3098 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3099 unsigned long *missing_events, u64 *ent_ts)
3100 {
3101 struct ring_buffer *buffer = iter->trace_buffer->buffer;
3102 struct trace_entry *ent, *next = NULL;
3103 unsigned long lost_events = 0, next_lost = 0;
3104 int cpu_file = iter->cpu_file;
3105 u64 next_ts = 0, ts;
3106 int next_cpu = -1;
3107 int next_size = 0;
3108 int cpu;
3109
3110 /*
3111 * If we are in a per_cpu trace file, don't bother by iterating over
3112 * all cpu and peek directly.
3113 */
3114 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3115 if (ring_buffer_empty_cpu(buffer, cpu_file))
3116 return NULL;
3117 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3118 if (ent_cpu)
3119 *ent_cpu = cpu_file;
3120
3121 return ent;
3122 }
3123
3124 for_each_tracing_cpu(cpu) {
3125
3126 if (ring_buffer_empty_cpu(buffer, cpu))
3127 continue;
3128
3129 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3130
3131 /*
3132 * Pick the entry with the smallest timestamp:
3133 */
3134 if (ent && (!next || ts < next_ts)) {
3135 next = ent;
3136 next_cpu = cpu;
3137 next_ts = ts;
3138 next_lost = lost_events;
3139 next_size = iter->ent_size;
3140 }
3141 }
3142
3143 iter->ent_size = next_size;
3144
3145 if (ent_cpu)
3146 *ent_cpu = next_cpu;
3147
3148 if (ent_ts)
3149 *ent_ts = next_ts;
3150
3151 if (missing_events)
3152 *missing_events = next_lost;
3153
3154 return next;
3155 }
3156
3157 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3158 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3159 int *ent_cpu, u64 *ent_ts)
3160 {
3161 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3162 }
3163
3164 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3165 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3166 {
3167 iter->ent = __find_next_entry(iter, &iter->cpu,
3168 &iter->lost_events, &iter->ts);
3169
3170 if (iter->ent)
3171 trace_iterator_increment(iter);
3172
3173 return iter->ent ? iter : NULL;
3174 }
3175
trace_consume(struct trace_iterator * iter)3176 static void trace_consume(struct trace_iterator *iter)
3177 {
3178 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3179 &iter->lost_events);
3180 }
3181
s_next(struct seq_file * m,void * v,loff_t * pos)3182 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3183 {
3184 struct trace_iterator *iter = m->private;
3185 int i = (int)*pos;
3186 void *ent;
3187
3188 WARN_ON_ONCE(iter->leftover);
3189
3190 (*pos)++;
3191
3192 /* can't go backwards */
3193 if (iter->idx > i)
3194 return NULL;
3195
3196 if (iter->idx < 0)
3197 ent = trace_find_next_entry_inc(iter);
3198 else
3199 ent = iter;
3200
3201 while (ent && iter->idx < i)
3202 ent = trace_find_next_entry_inc(iter);
3203
3204 iter->pos = *pos;
3205
3206 return ent;
3207 }
3208
tracing_iter_reset(struct trace_iterator * iter,int cpu)3209 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3210 {
3211 struct ring_buffer_event *event;
3212 struct ring_buffer_iter *buf_iter;
3213 unsigned long entries = 0;
3214 u64 ts;
3215
3216 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3217
3218 buf_iter = trace_buffer_iter(iter, cpu);
3219 if (!buf_iter)
3220 return;
3221
3222 ring_buffer_iter_reset(buf_iter);
3223
3224 /*
3225 * We could have the case with the max latency tracers
3226 * that a reset never took place on a cpu. This is evident
3227 * by the timestamp being before the start of the buffer.
3228 */
3229 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3230 if (ts >= iter->trace_buffer->time_start)
3231 break;
3232 entries++;
3233 ring_buffer_read(buf_iter, NULL);
3234 }
3235
3236 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3237 }
3238
3239 /*
3240 * The current tracer is copied to avoid a global locking
3241 * all around.
3242 */
s_start(struct seq_file * m,loff_t * pos)3243 static void *s_start(struct seq_file *m, loff_t *pos)
3244 {
3245 struct trace_iterator *iter = m->private;
3246 struct trace_array *tr = iter->tr;
3247 int cpu_file = iter->cpu_file;
3248 void *p = NULL;
3249 loff_t l = 0;
3250 int cpu;
3251
3252 /*
3253 * copy the tracer to avoid using a global lock all around.
3254 * iter->trace is a copy of current_trace, the pointer to the
3255 * name may be used instead of a strcmp(), as iter->trace->name
3256 * will point to the same string as current_trace->name.
3257 */
3258 mutex_lock(&trace_types_lock);
3259 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3260 *iter->trace = *tr->current_trace;
3261 mutex_unlock(&trace_types_lock);
3262
3263 #ifdef CONFIG_TRACER_MAX_TRACE
3264 if (iter->snapshot && iter->trace->use_max_tr)
3265 return ERR_PTR(-EBUSY);
3266 #endif
3267
3268 if (!iter->snapshot)
3269 atomic_inc(&trace_record_taskinfo_disabled);
3270
3271 if (*pos != iter->pos) {
3272 iter->ent = NULL;
3273 iter->cpu = 0;
3274 iter->idx = -1;
3275
3276 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3277 for_each_tracing_cpu(cpu)
3278 tracing_iter_reset(iter, cpu);
3279 } else
3280 tracing_iter_reset(iter, cpu_file);
3281
3282 iter->leftover = 0;
3283 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3284 ;
3285
3286 } else {
3287 /*
3288 * If we overflowed the seq_file before, then we want
3289 * to just reuse the trace_seq buffer again.
3290 */
3291 if (iter->leftover)
3292 p = iter;
3293 else {
3294 l = *pos - 1;
3295 p = s_next(m, p, &l);
3296 }
3297 }
3298
3299 trace_event_read_lock();
3300 trace_access_lock(cpu_file);
3301 return p;
3302 }
3303
s_stop(struct seq_file * m,void * p)3304 static void s_stop(struct seq_file *m, void *p)
3305 {
3306 struct trace_iterator *iter = m->private;
3307
3308 #ifdef CONFIG_TRACER_MAX_TRACE
3309 if (iter->snapshot && iter->trace->use_max_tr)
3310 return;
3311 #endif
3312
3313 if (!iter->snapshot)
3314 atomic_dec(&trace_record_taskinfo_disabled);
3315
3316 trace_access_unlock(iter->cpu_file);
3317 trace_event_read_unlock();
3318 }
3319
3320 static void
get_total_entries(struct trace_buffer * buf,unsigned long * total,unsigned long * entries)3321 get_total_entries(struct trace_buffer *buf,
3322 unsigned long *total, unsigned long *entries)
3323 {
3324 unsigned long count;
3325 int cpu;
3326
3327 *total = 0;
3328 *entries = 0;
3329
3330 for_each_tracing_cpu(cpu) {
3331 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3332 /*
3333 * If this buffer has skipped entries, then we hold all
3334 * entries for the trace and we need to ignore the
3335 * ones before the time stamp.
3336 */
3337 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3338 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3339 /* total is the same as the entries */
3340 *total += count;
3341 } else
3342 *total += count +
3343 ring_buffer_overrun_cpu(buf->buffer, cpu);
3344 *entries += count;
3345 }
3346 }
3347
print_lat_help_header(struct seq_file * m)3348 static void print_lat_help_header(struct seq_file *m)
3349 {
3350 seq_puts(m, "# _------=> CPU# \n"
3351 "# / _-----=> irqs-off \n"
3352 "# | / _----=> need-resched \n"
3353 "# || / _---=> hardirq/softirq \n"
3354 "# ||| / _--=> preempt-depth \n"
3355 "# |||| / delay \n"
3356 "# cmd pid ||||| time | caller \n"
3357 "# \\ / ||||| \\ | / \n");
3358 }
3359
print_event_info(struct trace_buffer * buf,struct seq_file * m)3360 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3361 {
3362 unsigned long total;
3363 unsigned long entries;
3364
3365 get_total_entries(buf, &total, &entries);
3366 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3367 entries, total, num_online_cpus());
3368 seq_puts(m, "#\n");
3369 }
3370
print_func_help_header(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3371 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3372 unsigned int flags)
3373 {
3374 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3375
3376 print_event_info(buf, m);
3377
3378 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
3379 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3380 }
3381
print_func_help_header_irq(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3382 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3383 unsigned int flags)
3384 {
3385 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3386 const char tgid_space[] = " ";
3387 const char space[] = " ";
3388
3389 print_event_info(buf, m);
3390
3391 seq_printf(m, "# %s _-----=> irqs-off\n",
3392 tgid ? tgid_space : space);
3393 seq_printf(m, "# %s / _----=> need-resched\n",
3394 tgid ? tgid_space : space);
3395 seq_printf(m, "# %s| / _---=> hardirq/softirq\n",
3396 tgid ? tgid_space : space);
3397 seq_printf(m, "# %s|| / _--=> preempt-depth\n",
3398 tgid ? tgid_space : space);
3399 seq_printf(m, "# %s||| / delay\n",
3400 tgid ? tgid_space : space);
3401 seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n",
3402 tgid ? " TGID " : space);
3403 seq_printf(m, "# | | %s | |||| | |\n",
3404 tgid ? " | " : space);
3405 }
3406
3407 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3408 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3409 {
3410 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3411 struct trace_buffer *buf = iter->trace_buffer;
3412 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3413 struct tracer *type = iter->trace;
3414 unsigned long entries;
3415 unsigned long total;
3416 const char *name = "preemption";
3417
3418 name = type->name;
3419
3420 get_total_entries(buf, &total, &entries);
3421
3422 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3423 name, UTS_RELEASE);
3424 seq_puts(m, "# -----------------------------------"
3425 "---------------------------------\n");
3426 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3427 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3428 nsecs_to_usecs(data->saved_latency),
3429 entries,
3430 total,
3431 buf->cpu,
3432 #if defined(CONFIG_PREEMPT_NONE)
3433 "server",
3434 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3435 "desktop",
3436 #elif defined(CONFIG_PREEMPT)
3437 "preempt",
3438 #else
3439 "unknown",
3440 #endif
3441 /* These are reserved for later use */
3442 0, 0, 0, 0);
3443 #ifdef CONFIG_SMP
3444 seq_printf(m, " #P:%d)\n", num_online_cpus());
3445 #else
3446 seq_puts(m, ")\n");
3447 #endif
3448 seq_puts(m, "# -----------------\n");
3449 seq_printf(m, "# | task: %.16s-%d "
3450 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3451 data->comm, data->pid,
3452 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3453 data->policy, data->rt_priority);
3454 seq_puts(m, "# -----------------\n");
3455
3456 if (data->critical_start) {
3457 seq_puts(m, "# => started at: ");
3458 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3459 trace_print_seq(m, &iter->seq);
3460 seq_puts(m, "\n# => ended at: ");
3461 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3462 trace_print_seq(m, &iter->seq);
3463 seq_puts(m, "\n#\n");
3464 }
3465
3466 seq_puts(m, "#\n");
3467 }
3468
test_cpu_buff_start(struct trace_iterator * iter)3469 static void test_cpu_buff_start(struct trace_iterator *iter)
3470 {
3471 struct trace_seq *s = &iter->seq;
3472 struct trace_array *tr = iter->tr;
3473
3474 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3475 return;
3476
3477 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3478 return;
3479
3480 if (cpumask_available(iter->started) &&
3481 cpumask_test_cpu(iter->cpu, iter->started))
3482 return;
3483
3484 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3485 return;
3486
3487 if (cpumask_available(iter->started))
3488 cpumask_set_cpu(iter->cpu, iter->started);
3489
3490 /* Don't print started cpu buffer for the first entry of the trace */
3491 if (iter->idx > 1)
3492 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3493 iter->cpu);
3494 }
3495
print_trace_fmt(struct trace_iterator * iter)3496 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3497 {
3498 struct trace_array *tr = iter->tr;
3499 struct trace_seq *s = &iter->seq;
3500 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3501 struct trace_entry *entry;
3502 struct trace_event *event;
3503
3504 entry = iter->ent;
3505
3506 test_cpu_buff_start(iter);
3507
3508 event = ftrace_find_event(entry->type);
3509
3510 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3511 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3512 trace_print_lat_context(iter);
3513 else
3514 trace_print_context(iter);
3515 }
3516
3517 if (trace_seq_has_overflowed(s))
3518 return TRACE_TYPE_PARTIAL_LINE;
3519
3520 if (event)
3521 return event->funcs->trace(iter, sym_flags, event);
3522
3523 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3524
3525 return trace_handle_return(s);
3526 }
3527
print_raw_fmt(struct trace_iterator * iter)3528 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3529 {
3530 struct trace_array *tr = iter->tr;
3531 struct trace_seq *s = &iter->seq;
3532 struct trace_entry *entry;
3533 struct trace_event *event;
3534
3535 entry = iter->ent;
3536
3537 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3538 trace_seq_printf(s, "%d %d %llu ",
3539 entry->pid, iter->cpu, iter->ts);
3540
3541 if (trace_seq_has_overflowed(s))
3542 return TRACE_TYPE_PARTIAL_LINE;
3543
3544 event = ftrace_find_event(entry->type);
3545 if (event)
3546 return event->funcs->raw(iter, 0, event);
3547
3548 trace_seq_printf(s, "%d ?\n", entry->type);
3549
3550 return trace_handle_return(s);
3551 }
3552
print_hex_fmt(struct trace_iterator * iter)3553 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3554 {
3555 struct trace_array *tr = iter->tr;
3556 struct trace_seq *s = &iter->seq;
3557 unsigned char newline = '\n';
3558 struct trace_entry *entry;
3559 struct trace_event *event;
3560
3561 entry = iter->ent;
3562
3563 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3564 SEQ_PUT_HEX_FIELD(s, entry->pid);
3565 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3566 SEQ_PUT_HEX_FIELD(s, iter->ts);
3567 if (trace_seq_has_overflowed(s))
3568 return TRACE_TYPE_PARTIAL_LINE;
3569 }
3570
3571 event = ftrace_find_event(entry->type);
3572 if (event) {
3573 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3574 if (ret != TRACE_TYPE_HANDLED)
3575 return ret;
3576 }
3577
3578 SEQ_PUT_FIELD(s, newline);
3579
3580 return trace_handle_return(s);
3581 }
3582
print_bin_fmt(struct trace_iterator * iter)3583 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3584 {
3585 struct trace_array *tr = iter->tr;
3586 struct trace_seq *s = &iter->seq;
3587 struct trace_entry *entry;
3588 struct trace_event *event;
3589
3590 entry = iter->ent;
3591
3592 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3593 SEQ_PUT_FIELD(s, entry->pid);
3594 SEQ_PUT_FIELD(s, iter->cpu);
3595 SEQ_PUT_FIELD(s, iter->ts);
3596 if (trace_seq_has_overflowed(s))
3597 return TRACE_TYPE_PARTIAL_LINE;
3598 }
3599
3600 event = ftrace_find_event(entry->type);
3601 return event ? event->funcs->binary(iter, 0, event) :
3602 TRACE_TYPE_HANDLED;
3603 }
3604
trace_empty(struct trace_iterator * iter)3605 int trace_empty(struct trace_iterator *iter)
3606 {
3607 struct ring_buffer_iter *buf_iter;
3608 int cpu;
3609
3610 /* If we are looking at one CPU buffer, only check that one */
3611 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3612 cpu = iter->cpu_file;
3613 buf_iter = trace_buffer_iter(iter, cpu);
3614 if (buf_iter) {
3615 if (!ring_buffer_iter_empty(buf_iter))
3616 return 0;
3617 } else {
3618 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3619 return 0;
3620 }
3621 return 1;
3622 }
3623
3624 for_each_tracing_cpu(cpu) {
3625 buf_iter = trace_buffer_iter(iter, cpu);
3626 if (buf_iter) {
3627 if (!ring_buffer_iter_empty(buf_iter))
3628 return 0;
3629 } else {
3630 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3631 return 0;
3632 }
3633 }
3634
3635 return 1;
3636 }
3637
3638 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3639 enum print_line_t print_trace_line(struct trace_iterator *iter)
3640 {
3641 struct trace_array *tr = iter->tr;
3642 unsigned long trace_flags = tr->trace_flags;
3643 enum print_line_t ret;
3644
3645 if (iter->lost_events) {
3646 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3647 iter->cpu, iter->lost_events);
3648 if (trace_seq_has_overflowed(&iter->seq))
3649 return TRACE_TYPE_PARTIAL_LINE;
3650 }
3651
3652 if (iter->trace && iter->trace->print_line) {
3653 ret = iter->trace->print_line(iter);
3654 if (ret != TRACE_TYPE_UNHANDLED)
3655 return ret;
3656 }
3657
3658 if (iter->ent->type == TRACE_BPUTS &&
3659 trace_flags & TRACE_ITER_PRINTK &&
3660 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3661 return trace_print_bputs_msg_only(iter);
3662
3663 if (iter->ent->type == TRACE_BPRINT &&
3664 trace_flags & TRACE_ITER_PRINTK &&
3665 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3666 return trace_print_bprintk_msg_only(iter);
3667
3668 if (iter->ent->type == TRACE_PRINT &&
3669 trace_flags & TRACE_ITER_PRINTK &&
3670 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3671 return trace_print_printk_msg_only(iter);
3672
3673 if (trace_flags & TRACE_ITER_BIN)
3674 return print_bin_fmt(iter);
3675
3676 if (trace_flags & TRACE_ITER_HEX)
3677 return print_hex_fmt(iter);
3678
3679 if (trace_flags & TRACE_ITER_RAW)
3680 return print_raw_fmt(iter);
3681
3682 return print_trace_fmt(iter);
3683 }
3684
trace_latency_header(struct seq_file * m)3685 void trace_latency_header(struct seq_file *m)
3686 {
3687 struct trace_iterator *iter = m->private;
3688 struct trace_array *tr = iter->tr;
3689
3690 /* print nothing if the buffers are empty */
3691 if (trace_empty(iter))
3692 return;
3693
3694 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3695 print_trace_header(m, iter);
3696
3697 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3698 print_lat_help_header(m);
3699 }
3700
trace_default_header(struct seq_file * m)3701 void trace_default_header(struct seq_file *m)
3702 {
3703 struct trace_iterator *iter = m->private;
3704 struct trace_array *tr = iter->tr;
3705 unsigned long trace_flags = tr->trace_flags;
3706
3707 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3708 return;
3709
3710 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3711 /* print nothing if the buffers are empty */
3712 if (trace_empty(iter))
3713 return;
3714 print_trace_header(m, iter);
3715 if (!(trace_flags & TRACE_ITER_VERBOSE))
3716 print_lat_help_header(m);
3717 } else {
3718 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3719 if (trace_flags & TRACE_ITER_IRQ_INFO)
3720 print_func_help_header_irq(iter->trace_buffer,
3721 m, trace_flags);
3722 else
3723 print_func_help_header(iter->trace_buffer, m,
3724 trace_flags);
3725 }
3726 }
3727 }
3728
test_ftrace_alive(struct seq_file * m)3729 static void test_ftrace_alive(struct seq_file *m)
3730 {
3731 if (!ftrace_is_dead())
3732 return;
3733 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3734 "# MAY BE MISSING FUNCTION EVENTS\n");
3735 }
3736
3737 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)3738 static void show_snapshot_main_help(struct seq_file *m)
3739 {
3740 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3741 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3742 "# Takes a snapshot of the main buffer.\n"
3743 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3744 "# (Doesn't have to be '2' works with any number that\n"
3745 "# is not a '0' or '1')\n");
3746 }
3747
show_snapshot_percpu_help(struct seq_file * m)3748 static void show_snapshot_percpu_help(struct seq_file *m)
3749 {
3750 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3751 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3752 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3753 "# Takes a snapshot of the main buffer for this cpu.\n");
3754 #else
3755 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3756 "# Must use main snapshot file to allocate.\n");
3757 #endif
3758 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3759 "# (Doesn't have to be '2' works with any number that\n"
3760 "# is not a '0' or '1')\n");
3761 }
3762
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3763 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3764 {
3765 if (iter->tr->allocated_snapshot)
3766 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3767 else
3768 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3769
3770 seq_puts(m, "# Snapshot commands:\n");
3771 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3772 show_snapshot_main_help(m);
3773 else
3774 show_snapshot_percpu_help(m);
3775 }
3776 #else
3777 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3778 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3779 #endif
3780
s_show(struct seq_file * m,void * v)3781 static int s_show(struct seq_file *m, void *v)
3782 {
3783 struct trace_iterator *iter = v;
3784 int ret;
3785
3786 if (iter->ent == NULL) {
3787 if (iter->tr) {
3788 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3789 seq_puts(m, "#\n");
3790 test_ftrace_alive(m);
3791 }
3792 if (iter->snapshot && trace_empty(iter))
3793 print_snapshot_help(m, iter);
3794 else if (iter->trace && iter->trace->print_header)
3795 iter->trace->print_header(m);
3796 else
3797 trace_default_header(m);
3798
3799 } else if (iter->leftover) {
3800 /*
3801 * If we filled the seq_file buffer earlier, we
3802 * want to just show it now.
3803 */
3804 ret = trace_print_seq(m, &iter->seq);
3805
3806 /* ret should this time be zero, but you never know */
3807 iter->leftover = ret;
3808
3809 } else {
3810 print_trace_line(iter);
3811 ret = trace_print_seq(m, &iter->seq);
3812 /*
3813 * If we overflow the seq_file buffer, then it will
3814 * ask us for this data again at start up.
3815 * Use that instead.
3816 * ret is 0 if seq_file write succeeded.
3817 * -1 otherwise.
3818 */
3819 iter->leftover = ret;
3820 }
3821
3822 return 0;
3823 }
3824
3825 /*
3826 * Should be used after trace_array_get(), trace_types_lock
3827 * ensures that i_cdev was already initialized.
3828 */
tracing_get_cpu(struct inode * inode)3829 static inline int tracing_get_cpu(struct inode *inode)
3830 {
3831 if (inode->i_cdev) /* See trace_create_cpu_file() */
3832 return (long)inode->i_cdev - 1;
3833 return RING_BUFFER_ALL_CPUS;
3834 }
3835
3836 static const struct seq_operations tracer_seq_ops = {
3837 .start = s_start,
3838 .next = s_next,
3839 .stop = s_stop,
3840 .show = s_show,
3841 };
3842
3843 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3844 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3845 {
3846 struct trace_array *tr = inode->i_private;
3847 struct trace_iterator *iter;
3848 int cpu;
3849
3850 if (tracing_disabled)
3851 return ERR_PTR(-ENODEV);
3852
3853 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3854 if (!iter)
3855 return ERR_PTR(-ENOMEM);
3856
3857 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3858 GFP_KERNEL);
3859 if (!iter->buffer_iter)
3860 goto release;
3861
3862 /*
3863 * We make a copy of the current tracer to avoid concurrent
3864 * changes on it while we are reading.
3865 */
3866 mutex_lock(&trace_types_lock);
3867 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3868 if (!iter->trace)
3869 goto fail;
3870
3871 *iter->trace = *tr->current_trace;
3872
3873 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3874 goto fail;
3875
3876 iter->tr = tr;
3877
3878 #ifdef CONFIG_TRACER_MAX_TRACE
3879 /* Currently only the top directory has a snapshot */
3880 if (tr->current_trace->print_max || snapshot)
3881 iter->trace_buffer = &tr->max_buffer;
3882 else
3883 #endif
3884 iter->trace_buffer = &tr->trace_buffer;
3885 iter->snapshot = snapshot;
3886 iter->pos = -1;
3887 iter->cpu_file = tracing_get_cpu(inode);
3888 mutex_init(&iter->mutex);
3889
3890 /* Notify the tracer early; before we stop tracing. */
3891 if (iter->trace && iter->trace->open)
3892 iter->trace->open(iter);
3893
3894 /* Annotate start of buffers if we had overruns */
3895 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3896 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3897
3898 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3899 if (trace_clocks[tr->clock_id].in_ns)
3900 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3901
3902 /* stop the trace while dumping if we are not opening "snapshot" */
3903 if (!iter->snapshot)
3904 tracing_stop_tr(tr);
3905
3906 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3907 for_each_tracing_cpu(cpu) {
3908 iter->buffer_iter[cpu] =
3909 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3910 cpu, GFP_KERNEL);
3911 }
3912 ring_buffer_read_prepare_sync();
3913 for_each_tracing_cpu(cpu) {
3914 ring_buffer_read_start(iter->buffer_iter[cpu]);
3915 tracing_iter_reset(iter, cpu);
3916 }
3917 } else {
3918 cpu = iter->cpu_file;
3919 iter->buffer_iter[cpu] =
3920 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3921 cpu, GFP_KERNEL);
3922 ring_buffer_read_prepare_sync();
3923 ring_buffer_read_start(iter->buffer_iter[cpu]);
3924 tracing_iter_reset(iter, cpu);
3925 }
3926
3927 mutex_unlock(&trace_types_lock);
3928
3929 return iter;
3930
3931 fail:
3932 mutex_unlock(&trace_types_lock);
3933 kfree(iter->trace);
3934 kfree(iter->buffer_iter);
3935 release:
3936 seq_release_private(inode, file);
3937 return ERR_PTR(-ENOMEM);
3938 }
3939
tracing_open_generic(struct inode * inode,struct file * filp)3940 int tracing_open_generic(struct inode *inode, struct file *filp)
3941 {
3942 if (tracing_disabled)
3943 return -ENODEV;
3944
3945 filp->private_data = inode->i_private;
3946 return 0;
3947 }
3948
tracing_is_disabled(void)3949 bool tracing_is_disabled(void)
3950 {
3951 return (tracing_disabled) ? true: false;
3952 }
3953
3954 /*
3955 * Open and update trace_array ref count.
3956 * Must have the current trace_array passed to it.
3957 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)3958 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3959 {
3960 struct trace_array *tr = inode->i_private;
3961
3962 if (tracing_disabled)
3963 return -ENODEV;
3964
3965 if (trace_array_get(tr) < 0)
3966 return -ENODEV;
3967
3968 filp->private_data = inode->i_private;
3969
3970 return 0;
3971 }
3972
tracing_release(struct inode * inode,struct file * file)3973 static int tracing_release(struct inode *inode, struct file *file)
3974 {
3975 struct trace_array *tr = inode->i_private;
3976 struct seq_file *m = file->private_data;
3977 struct trace_iterator *iter;
3978 int cpu;
3979
3980 if (!(file->f_mode & FMODE_READ)) {
3981 trace_array_put(tr);
3982 return 0;
3983 }
3984
3985 /* Writes do not use seq_file */
3986 iter = m->private;
3987 mutex_lock(&trace_types_lock);
3988
3989 for_each_tracing_cpu(cpu) {
3990 if (iter->buffer_iter[cpu])
3991 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3992 }
3993
3994 if (iter->trace && iter->trace->close)
3995 iter->trace->close(iter);
3996
3997 if (!iter->snapshot)
3998 /* reenable tracing if it was previously enabled */
3999 tracing_start_tr(tr);
4000
4001 __trace_array_put(tr);
4002
4003 mutex_unlock(&trace_types_lock);
4004
4005 mutex_destroy(&iter->mutex);
4006 free_cpumask_var(iter->started);
4007 kfree(iter->trace);
4008 kfree(iter->buffer_iter);
4009 seq_release_private(inode, file);
4010
4011 return 0;
4012 }
4013
tracing_release_generic_tr(struct inode * inode,struct file * file)4014 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4015 {
4016 struct trace_array *tr = inode->i_private;
4017
4018 trace_array_put(tr);
4019 return 0;
4020 }
4021
tracing_single_release_tr(struct inode * inode,struct file * file)4022 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4023 {
4024 struct trace_array *tr = inode->i_private;
4025
4026 trace_array_put(tr);
4027
4028 return single_release(inode, file);
4029 }
4030
tracing_open(struct inode * inode,struct file * file)4031 static int tracing_open(struct inode *inode, struct file *file)
4032 {
4033 struct trace_array *tr = inode->i_private;
4034 struct trace_iterator *iter;
4035 int ret = 0;
4036
4037 if (trace_array_get(tr) < 0)
4038 return -ENODEV;
4039
4040 /* If this file was open for write, then erase contents */
4041 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4042 int cpu = tracing_get_cpu(inode);
4043 struct trace_buffer *trace_buf = &tr->trace_buffer;
4044
4045 #ifdef CONFIG_TRACER_MAX_TRACE
4046 if (tr->current_trace->print_max)
4047 trace_buf = &tr->max_buffer;
4048 #endif
4049
4050 if (cpu == RING_BUFFER_ALL_CPUS)
4051 tracing_reset_online_cpus(trace_buf);
4052 else
4053 tracing_reset(trace_buf, cpu);
4054 }
4055
4056 if (file->f_mode & FMODE_READ) {
4057 iter = __tracing_open(inode, file, false);
4058 if (IS_ERR(iter))
4059 ret = PTR_ERR(iter);
4060 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4061 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4062 }
4063
4064 if (ret < 0)
4065 trace_array_put(tr);
4066
4067 return ret;
4068 }
4069
4070 /*
4071 * Some tracers are not suitable for instance buffers.
4072 * A tracer is always available for the global array (toplevel)
4073 * or if it explicitly states that it is.
4074 */
4075 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4076 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4077 {
4078 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4079 }
4080
4081 /* Find the next tracer that this trace array may use */
4082 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4083 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4084 {
4085 while (t && !trace_ok_for_array(t, tr))
4086 t = t->next;
4087
4088 return t;
4089 }
4090
4091 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4092 t_next(struct seq_file *m, void *v, loff_t *pos)
4093 {
4094 struct trace_array *tr = m->private;
4095 struct tracer *t = v;
4096
4097 (*pos)++;
4098
4099 if (t)
4100 t = get_tracer_for_array(tr, t->next);
4101
4102 return t;
4103 }
4104
t_start(struct seq_file * m,loff_t * pos)4105 static void *t_start(struct seq_file *m, loff_t *pos)
4106 {
4107 struct trace_array *tr = m->private;
4108 struct tracer *t;
4109 loff_t l = 0;
4110
4111 mutex_lock(&trace_types_lock);
4112
4113 t = get_tracer_for_array(tr, trace_types);
4114 for (; t && l < *pos; t = t_next(m, t, &l))
4115 ;
4116
4117 return t;
4118 }
4119
t_stop(struct seq_file * m,void * p)4120 static void t_stop(struct seq_file *m, void *p)
4121 {
4122 mutex_unlock(&trace_types_lock);
4123 }
4124
t_show(struct seq_file * m,void * v)4125 static int t_show(struct seq_file *m, void *v)
4126 {
4127 struct tracer *t = v;
4128
4129 if (!t)
4130 return 0;
4131
4132 seq_puts(m, t->name);
4133 if (t->next)
4134 seq_putc(m, ' ');
4135 else
4136 seq_putc(m, '\n');
4137
4138 return 0;
4139 }
4140
4141 static const struct seq_operations show_traces_seq_ops = {
4142 .start = t_start,
4143 .next = t_next,
4144 .stop = t_stop,
4145 .show = t_show,
4146 };
4147
show_traces_open(struct inode * inode,struct file * file)4148 static int show_traces_open(struct inode *inode, struct file *file)
4149 {
4150 struct trace_array *tr = inode->i_private;
4151 struct seq_file *m;
4152 int ret;
4153
4154 if (tracing_disabled)
4155 return -ENODEV;
4156
4157 if (trace_array_get(tr) < 0)
4158 return -ENODEV;
4159
4160 ret = seq_open(file, &show_traces_seq_ops);
4161 if (ret) {
4162 trace_array_put(tr);
4163 return ret;
4164 }
4165
4166 m = file->private_data;
4167 m->private = tr;
4168
4169 return 0;
4170 }
4171
show_traces_release(struct inode * inode,struct file * file)4172 static int show_traces_release(struct inode *inode, struct file *file)
4173 {
4174 struct trace_array *tr = inode->i_private;
4175
4176 trace_array_put(tr);
4177 return seq_release(inode, file);
4178 }
4179
4180 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4181 tracing_write_stub(struct file *filp, const char __user *ubuf,
4182 size_t count, loff_t *ppos)
4183 {
4184 return count;
4185 }
4186
tracing_lseek(struct file * file,loff_t offset,int whence)4187 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4188 {
4189 int ret;
4190
4191 if (file->f_mode & FMODE_READ)
4192 ret = seq_lseek(file, offset, whence);
4193 else
4194 file->f_pos = ret = 0;
4195
4196 return ret;
4197 }
4198
4199 static const struct file_operations tracing_fops = {
4200 .open = tracing_open,
4201 .read = seq_read,
4202 .write = tracing_write_stub,
4203 .llseek = tracing_lseek,
4204 .release = tracing_release,
4205 };
4206
4207 static const struct file_operations show_traces_fops = {
4208 .open = show_traces_open,
4209 .read = seq_read,
4210 .llseek = seq_lseek,
4211 .release = show_traces_release,
4212 };
4213
4214 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4215 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4216 size_t count, loff_t *ppos)
4217 {
4218 struct trace_array *tr = file_inode(filp)->i_private;
4219 char *mask_str;
4220 int len;
4221
4222 len = snprintf(NULL, 0, "%*pb\n",
4223 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4224 mask_str = kmalloc(len, GFP_KERNEL);
4225 if (!mask_str)
4226 return -ENOMEM;
4227
4228 len = snprintf(mask_str, len, "%*pb\n",
4229 cpumask_pr_args(tr->tracing_cpumask));
4230 if (len >= count) {
4231 count = -EINVAL;
4232 goto out_err;
4233 }
4234 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4235
4236 out_err:
4237 kfree(mask_str);
4238
4239 return count;
4240 }
4241
4242 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4243 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4244 size_t count, loff_t *ppos)
4245 {
4246 struct trace_array *tr = file_inode(filp)->i_private;
4247 cpumask_var_t tracing_cpumask_new;
4248 int err, cpu;
4249
4250 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4251 return -ENOMEM;
4252
4253 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4254 if (err)
4255 goto err_unlock;
4256
4257 local_irq_disable();
4258 arch_spin_lock(&tr->max_lock);
4259 for_each_tracing_cpu(cpu) {
4260 /*
4261 * Increase/decrease the disabled counter if we are
4262 * about to flip a bit in the cpumask:
4263 */
4264 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4265 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4266 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4267 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4268 }
4269 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4270 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4271 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4272 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4273 }
4274 }
4275 arch_spin_unlock(&tr->max_lock);
4276 local_irq_enable();
4277
4278 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4279 free_cpumask_var(tracing_cpumask_new);
4280
4281 return count;
4282
4283 err_unlock:
4284 free_cpumask_var(tracing_cpumask_new);
4285
4286 return err;
4287 }
4288
4289 static const struct file_operations tracing_cpumask_fops = {
4290 .open = tracing_open_generic_tr,
4291 .read = tracing_cpumask_read,
4292 .write = tracing_cpumask_write,
4293 .release = tracing_release_generic_tr,
4294 .llseek = generic_file_llseek,
4295 };
4296
tracing_trace_options_show(struct seq_file * m,void * v)4297 static int tracing_trace_options_show(struct seq_file *m, void *v)
4298 {
4299 struct tracer_opt *trace_opts;
4300 struct trace_array *tr = m->private;
4301 u32 tracer_flags;
4302 int i;
4303
4304 mutex_lock(&trace_types_lock);
4305 tracer_flags = tr->current_trace->flags->val;
4306 trace_opts = tr->current_trace->flags->opts;
4307
4308 for (i = 0; trace_options[i]; i++) {
4309 if (tr->trace_flags & (1 << i))
4310 seq_printf(m, "%s\n", trace_options[i]);
4311 else
4312 seq_printf(m, "no%s\n", trace_options[i]);
4313 }
4314
4315 for (i = 0; trace_opts[i].name; i++) {
4316 if (tracer_flags & trace_opts[i].bit)
4317 seq_printf(m, "%s\n", trace_opts[i].name);
4318 else
4319 seq_printf(m, "no%s\n", trace_opts[i].name);
4320 }
4321 mutex_unlock(&trace_types_lock);
4322
4323 return 0;
4324 }
4325
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4326 static int __set_tracer_option(struct trace_array *tr,
4327 struct tracer_flags *tracer_flags,
4328 struct tracer_opt *opts, int neg)
4329 {
4330 struct tracer *trace = tracer_flags->trace;
4331 int ret;
4332
4333 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4334 if (ret)
4335 return ret;
4336
4337 if (neg)
4338 tracer_flags->val &= ~opts->bit;
4339 else
4340 tracer_flags->val |= opts->bit;
4341 return 0;
4342 }
4343
4344 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4345 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4346 {
4347 struct tracer *trace = tr->current_trace;
4348 struct tracer_flags *tracer_flags = trace->flags;
4349 struct tracer_opt *opts = NULL;
4350 int i;
4351
4352 for (i = 0; tracer_flags->opts[i].name; i++) {
4353 opts = &tracer_flags->opts[i];
4354
4355 if (strcmp(cmp, opts->name) == 0)
4356 return __set_tracer_option(tr, trace->flags, opts, neg);
4357 }
4358
4359 return -EINVAL;
4360 }
4361
4362 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4363 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4364 {
4365 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4366 return -1;
4367
4368 return 0;
4369 }
4370
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4371 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4372 {
4373 if ((mask == TRACE_ITER_RECORD_TGID) ||
4374 (mask == TRACE_ITER_RECORD_CMD))
4375 lockdep_assert_held(&event_mutex);
4376
4377 /* do nothing if flag is already set */
4378 if (!!(tr->trace_flags & mask) == !!enabled)
4379 return 0;
4380
4381 /* Give the tracer a chance to approve the change */
4382 if (tr->current_trace->flag_changed)
4383 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4384 return -EINVAL;
4385
4386 if (enabled)
4387 tr->trace_flags |= mask;
4388 else
4389 tr->trace_flags &= ~mask;
4390
4391 if (mask == TRACE_ITER_RECORD_CMD)
4392 trace_event_enable_cmd_record(enabled);
4393
4394 if (mask == TRACE_ITER_RECORD_TGID) {
4395 if (!tgid_map)
4396 tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4397 GFP_KERNEL);
4398 if (!tgid_map) {
4399 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4400 return -ENOMEM;
4401 }
4402
4403 trace_event_enable_tgid_record(enabled);
4404 }
4405
4406 if (mask == TRACE_ITER_EVENT_FORK)
4407 trace_event_follow_fork(tr, enabled);
4408
4409 if (mask == TRACE_ITER_FUNC_FORK)
4410 ftrace_pid_follow_fork(tr, enabled);
4411
4412 if (mask == TRACE_ITER_OVERWRITE) {
4413 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4414 #ifdef CONFIG_TRACER_MAX_TRACE
4415 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4416 #endif
4417 }
4418
4419 if (mask == TRACE_ITER_PRINTK) {
4420 trace_printk_start_stop_comm(enabled);
4421 trace_printk_control(enabled);
4422 }
4423
4424 return 0;
4425 }
4426
trace_set_options(struct trace_array * tr,char * option)4427 static int trace_set_options(struct trace_array *tr, char *option)
4428 {
4429 char *cmp;
4430 int neg = 0;
4431 int ret = -ENODEV;
4432 int i;
4433 size_t orig_len = strlen(option);
4434
4435 cmp = strstrip(option);
4436
4437 if (strncmp(cmp, "no", 2) == 0) {
4438 neg = 1;
4439 cmp += 2;
4440 }
4441
4442 mutex_lock(&event_mutex);
4443 mutex_lock(&trace_types_lock);
4444
4445 for (i = 0; trace_options[i]; i++) {
4446 if (strcmp(cmp, trace_options[i]) == 0) {
4447 ret = set_tracer_flag(tr, 1 << i, !neg);
4448 break;
4449 }
4450 }
4451
4452 /* If no option could be set, test the specific tracer options */
4453 if (!trace_options[i])
4454 ret = set_tracer_option(tr, cmp, neg);
4455
4456 mutex_unlock(&trace_types_lock);
4457 mutex_unlock(&event_mutex);
4458
4459 /*
4460 * If the first trailing whitespace is replaced with '\0' by strstrip,
4461 * turn it back into a space.
4462 */
4463 if (orig_len > strlen(option))
4464 option[strlen(option)] = ' ';
4465
4466 return ret;
4467 }
4468
apply_trace_boot_options(void)4469 static void __init apply_trace_boot_options(void)
4470 {
4471 char *buf = trace_boot_options_buf;
4472 char *option;
4473
4474 while (true) {
4475 option = strsep(&buf, ",");
4476
4477 if (!option)
4478 break;
4479
4480 if (*option)
4481 trace_set_options(&global_trace, option);
4482
4483 /* Put back the comma to allow this to be called again */
4484 if (buf)
4485 *(buf - 1) = ',';
4486 }
4487 }
4488
4489 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4490 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4491 size_t cnt, loff_t *ppos)
4492 {
4493 struct seq_file *m = filp->private_data;
4494 struct trace_array *tr = m->private;
4495 char buf[64];
4496 int ret;
4497
4498 if (cnt >= sizeof(buf))
4499 return -EINVAL;
4500
4501 if (copy_from_user(buf, ubuf, cnt))
4502 return -EFAULT;
4503
4504 buf[cnt] = 0;
4505
4506 ret = trace_set_options(tr, buf);
4507 if (ret < 0)
4508 return ret;
4509
4510 *ppos += cnt;
4511
4512 return cnt;
4513 }
4514
tracing_trace_options_open(struct inode * inode,struct file * file)4515 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4516 {
4517 struct trace_array *tr = inode->i_private;
4518 int ret;
4519
4520 if (tracing_disabled)
4521 return -ENODEV;
4522
4523 if (trace_array_get(tr) < 0)
4524 return -ENODEV;
4525
4526 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4527 if (ret < 0)
4528 trace_array_put(tr);
4529
4530 return ret;
4531 }
4532
4533 static const struct file_operations tracing_iter_fops = {
4534 .open = tracing_trace_options_open,
4535 .read = seq_read,
4536 .llseek = seq_lseek,
4537 .release = tracing_single_release_tr,
4538 .write = tracing_trace_options_write,
4539 };
4540
4541 static const char readme_msg[] =
4542 "tracing mini-HOWTO:\n\n"
4543 "# echo 0 > tracing_on : quick way to disable tracing\n"
4544 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4545 " Important files:\n"
4546 " trace\t\t\t- The static contents of the buffer\n"
4547 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4548 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4549 " current_tracer\t- function and latency tracers\n"
4550 " available_tracers\t- list of configured tracers for current_tracer\n"
4551 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4552 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4553 " trace_clock\t\t-change the clock used to order events\n"
4554 " local: Per cpu clock but may not be synced across CPUs\n"
4555 " global: Synced across CPUs but slows tracing down.\n"
4556 " counter: Not a clock, but just an increment\n"
4557 " uptime: Jiffy counter from time of boot\n"
4558 " perf: Same clock that perf events use\n"
4559 #ifdef CONFIG_X86_64
4560 " x86-tsc: TSC cycle counter\n"
4561 #endif
4562 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4563 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4564 " tracing_cpumask\t- Limit which CPUs to trace\n"
4565 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4566 "\t\t\t Remove sub-buffer with rmdir\n"
4567 " trace_options\t\t- Set format or modify how tracing happens\n"
4568 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4569 "\t\t\t option name\n"
4570 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4571 #ifdef CONFIG_DYNAMIC_FTRACE
4572 "\n available_filter_functions - list of functions that can be filtered on\n"
4573 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4574 "\t\t\t functions\n"
4575 "\t accepts: func_full_name or glob-matching-pattern\n"
4576 "\t modules: Can select a group via module\n"
4577 "\t Format: :mod:<module-name>\n"
4578 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4579 "\t triggers: a command to perform when function is hit\n"
4580 "\t Format: <function>:<trigger>[:count]\n"
4581 "\t trigger: traceon, traceoff\n"
4582 "\t\t enable_event:<system>:<event>\n"
4583 "\t\t disable_event:<system>:<event>\n"
4584 #ifdef CONFIG_STACKTRACE
4585 "\t\t stacktrace\n"
4586 #endif
4587 #ifdef CONFIG_TRACER_SNAPSHOT
4588 "\t\t snapshot\n"
4589 #endif
4590 "\t\t dump\n"
4591 "\t\t cpudump\n"
4592 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4593 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4594 "\t The first one will disable tracing every time do_fault is hit\n"
4595 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4596 "\t The first time do trap is hit and it disables tracing, the\n"
4597 "\t counter will decrement to 2. If tracing is already disabled,\n"
4598 "\t the counter will not decrement. It only decrements when the\n"
4599 "\t trigger did work\n"
4600 "\t To remove trigger without count:\n"
4601 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4602 "\t To remove trigger with a count:\n"
4603 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4604 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4605 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4606 "\t modules: Can select a group via module command :mod:\n"
4607 "\t Does not accept triggers\n"
4608 #endif /* CONFIG_DYNAMIC_FTRACE */
4609 #ifdef CONFIG_FUNCTION_TRACER
4610 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4611 "\t\t (function)\n"
4612 #endif
4613 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4614 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4615 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4616 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4617 #endif
4618 #ifdef CONFIG_TRACER_SNAPSHOT
4619 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4620 "\t\t\t snapshot buffer. Read the contents for more\n"
4621 "\t\t\t information\n"
4622 #endif
4623 #ifdef CONFIG_STACK_TRACER
4624 " stack_trace\t\t- Shows the max stack trace when active\n"
4625 " stack_max_size\t- Shows current max stack size that was traced\n"
4626 "\t\t\t Write into this file to reset the max size (trigger a\n"
4627 "\t\t\t new trace)\n"
4628 #ifdef CONFIG_DYNAMIC_FTRACE
4629 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4630 "\t\t\t traces\n"
4631 #endif
4632 #endif /* CONFIG_STACK_TRACER */
4633 #ifdef CONFIG_KPROBE_EVENTS
4634 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4635 "\t\t\t Write into this file to define/undefine new trace events.\n"
4636 #endif
4637 #ifdef CONFIG_UPROBE_EVENTS
4638 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4639 "\t\t\t Write into this file to define/undefine new trace events.\n"
4640 #endif
4641 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4642 "\t accepts: event-definitions (one definition per line)\n"
4643 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
4644 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4645 "\t -:[<group>/]<event>\n"
4646 #ifdef CONFIG_KPROBE_EVENTS
4647 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4648 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4649 #endif
4650 #ifdef CONFIG_UPROBE_EVENTS
4651 "\t place: <path>:<offset>\n"
4652 #endif
4653 "\t args: <name>=fetcharg[:type]\n"
4654 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4655 "\t $stack<index>, $stack, $retval, $comm\n"
4656 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4657 "\t b<bit-width>@<bit-offset>/<container-size>\n"
4658 #endif
4659 " events/\t\t- Directory containing all trace event subsystems:\n"
4660 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4661 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4662 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4663 "\t\t\t events\n"
4664 " filter\t\t- If set, only events passing filter are traced\n"
4665 " events/<system>/<event>/\t- Directory containing control files for\n"
4666 "\t\t\t <event>:\n"
4667 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4668 " filter\t\t- If set, only events passing filter are traced\n"
4669 " trigger\t\t- If set, a command to perform when event is hit\n"
4670 "\t Format: <trigger>[:count][if <filter>]\n"
4671 "\t trigger: traceon, traceoff\n"
4672 "\t enable_event:<system>:<event>\n"
4673 "\t disable_event:<system>:<event>\n"
4674 #ifdef CONFIG_HIST_TRIGGERS
4675 "\t enable_hist:<system>:<event>\n"
4676 "\t disable_hist:<system>:<event>\n"
4677 #endif
4678 #ifdef CONFIG_STACKTRACE
4679 "\t\t stacktrace\n"
4680 #endif
4681 #ifdef CONFIG_TRACER_SNAPSHOT
4682 "\t\t snapshot\n"
4683 #endif
4684 #ifdef CONFIG_HIST_TRIGGERS
4685 "\t\t hist (see below)\n"
4686 #endif
4687 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4688 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4689 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4690 "\t events/block/block_unplug/trigger\n"
4691 "\t The first disables tracing every time block_unplug is hit.\n"
4692 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4693 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4694 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4695 "\t Like function triggers, the counter is only decremented if it\n"
4696 "\t enabled or disabled tracing.\n"
4697 "\t To remove a trigger without a count:\n"
4698 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4699 "\t To remove a trigger with a count:\n"
4700 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4701 "\t Filters can be ignored when removing a trigger.\n"
4702 #ifdef CONFIG_HIST_TRIGGERS
4703 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4704 "\t Format: hist:keys=<field1[,field2,...]>\n"
4705 "\t [:values=<field1[,field2,...]>]\n"
4706 "\t [:sort=<field1[,field2,...]>]\n"
4707 "\t [:size=#entries]\n"
4708 "\t [:pause][:continue][:clear]\n"
4709 "\t [:name=histname1]\n"
4710 "\t [if <filter>]\n\n"
4711 "\t When a matching event is hit, an entry is added to a hash\n"
4712 "\t table using the key(s) and value(s) named, and the value of a\n"
4713 "\t sum called 'hitcount' is incremented. Keys and values\n"
4714 "\t correspond to fields in the event's format description. Keys\n"
4715 "\t can be any field, or the special string 'stacktrace'.\n"
4716 "\t Compound keys consisting of up to two fields can be specified\n"
4717 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4718 "\t fields. Sort keys consisting of up to two fields can be\n"
4719 "\t specified using the 'sort' keyword. The sort direction can\n"
4720 "\t be modified by appending '.descending' or '.ascending' to a\n"
4721 "\t sort field. The 'size' parameter can be used to specify more\n"
4722 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4723 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4724 "\t its histogram data will be shared with other triggers of the\n"
4725 "\t same name, and trigger hits will update this common data.\n\n"
4726 "\t Reading the 'hist' file for the event will dump the hash\n"
4727 "\t table in its entirety to stdout. If there are multiple hist\n"
4728 "\t triggers attached to an event, there will be a table for each\n"
4729 "\t trigger in the output. The table displayed for a named\n"
4730 "\t trigger will be the same as any other instance having the\n"
4731 "\t same name. The default format used to display a given field\n"
4732 "\t can be modified by appending any of the following modifiers\n"
4733 "\t to the field name, as applicable:\n\n"
4734 "\t .hex display a number as a hex value\n"
4735 "\t .sym display an address as a symbol\n"
4736 "\t .sym-offset display an address as a symbol and offset\n"
4737 "\t .execname display a common_pid as a program name\n"
4738 "\t .syscall display a syscall id as a syscall name\n\n"
4739 "\t .log2 display log2 value rather than raw number\n\n"
4740 "\t The 'pause' parameter can be used to pause an existing hist\n"
4741 "\t trigger or to start a hist trigger but not log any events\n"
4742 "\t until told to do so. 'continue' can be used to start or\n"
4743 "\t restart a paused hist trigger.\n\n"
4744 "\t The 'clear' parameter will clear the contents of a running\n"
4745 "\t hist trigger and leave its current paused/active state\n"
4746 "\t unchanged.\n\n"
4747 "\t The enable_hist and disable_hist triggers can be used to\n"
4748 "\t have one event conditionally start and stop another event's\n"
4749 "\t already-attached hist trigger. The syntax is analagous to\n"
4750 "\t the enable_event and disable_event triggers.\n"
4751 #endif
4752 ;
4753
4754 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4755 tracing_readme_read(struct file *filp, char __user *ubuf,
4756 size_t cnt, loff_t *ppos)
4757 {
4758 return simple_read_from_buffer(ubuf, cnt, ppos,
4759 readme_msg, strlen(readme_msg));
4760 }
4761
4762 static const struct file_operations tracing_readme_fops = {
4763 .open = tracing_open_generic,
4764 .read = tracing_readme_read,
4765 .llseek = generic_file_llseek,
4766 };
4767
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)4768 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4769 {
4770 int *ptr = v;
4771
4772 if (*pos || m->count)
4773 ptr++;
4774
4775 (*pos)++;
4776
4777 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4778 if (trace_find_tgid(*ptr))
4779 return ptr;
4780 }
4781
4782 return NULL;
4783 }
4784
saved_tgids_start(struct seq_file * m,loff_t * pos)4785 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4786 {
4787 void *v;
4788 loff_t l = 0;
4789
4790 if (!tgid_map)
4791 return NULL;
4792
4793 v = &tgid_map[0];
4794 while (l <= *pos) {
4795 v = saved_tgids_next(m, v, &l);
4796 if (!v)
4797 return NULL;
4798 }
4799
4800 return v;
4801 }
4802
saved_tgids_stop(struct seq_file * m,void * v)4803 static void saved_tgids_stop(struct seq_file *m, void *v)
4804 {
4805 }
4806
saved_tgids_show(struct seq_file * m,void * v)4807 static int saved_tgids_show(struct seq_file *m, void *v)
4808 {
4809 int pid = (int *)v - tgid_map;
4810
4811 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4812 return 0;
4813 }
4814
4815 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4816 .start = saved_tgids_start,
4817 .stop = saved_tgids_stop,
4818 .next = saved_tgids_next,
4819 .show = saved_tgids_show,
4820 };
4821
tracing_saved_tgids_open(struct inode * inode,struct file * filp)4822 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4823 {
4824 if (tracing_disabled)
4825 return -ENODEV;
4826
4827 return seq_open(filp, &tracing_saved_tgids_seq_ops);
4828 }
4829
4830
4831 static const struct file_operations tracing_saved_tgids_fops = {
4832 .open = tracing_saved_tgids_open,
4833 .read = seq_read,
4834 .llseek = seq_lseek,
4835 .release = seq_release,
4836 };
4837
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)4838 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4839 {
4840 unsigned int *ptr = v;
4841
4842 if (*pos || m->count)
4843 ptr++;
4844
4845 (*pos)++;
4846
4847 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4848 ptr++) {
4849 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4850 continue;
4851
4852 return ptr;
4853 }
4854
4855 return NULL;
4856 }
4857
saved_cmdlines_start(struct seq_file * m,loff_t * pos)4858 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4859 {
4860 void *v;
4861 loff_t l = 0;
4862
4863 preempt_disable();
4864 arch_spin_lock(&trace_cmdline_lock);
4865
4866 v = &savedcmd->map_cmdline_to_pid[0];
4867 while (l <= *pos) {
4868 v = saved_cmdlines_next(m, v, &l);
4869 if (!v)
4870 return NULL;
4871 }
4872
4873 return v;
4874 }
4875
saved_cmdlines_stop(struct seq_file * m,void * v)4876 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4877 {
4878 arch_spin_unlock(&trace_cmdline_lock);
4879 preempt_enable();
4880 }
4881
saved_cmdlines_show(struct seq_file * m,void * v)4882 static int saved_cmdlines_show(struct seq_file *m, void *v)
4883 {
4884 char buf[TASK_COMM_LEN];
4885 unsigned int *pid = v;
4886
4887 __trace_find_cmdline(*pid, buf);
4888 seq_printf(m, "%d %s\n", *pid, buf);
4889 return 0;
4890 }
4891
4892 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4893 .start = saved_cmdlines_start,
4894 .next = saved_cmdlines_next,
4895 .stop = saved_cmdlines_stop,
4896 .show = saved_cmdlines_show,
4897 };
4898
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)4899 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4900 {
4901 if (tracing_disabled)
4902 return -ENODEV;
4903
4904 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4905 }
4906
4907 static const struct file_operations tracing_saved_cmdlines_fops = {
4908 .open = tracing_saved_cmdlines_open,
4909 .read = seq_read,
4910 .llseek = seq_lseek,
4911 .release = seq_release,
4912 };
4913
4914 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4915 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4916 size_t cnt, loff_t *ppos)
4917 {
4918 char buf[64];
4919 int r;
4920
4921 arch_spin_lock(&trace_cmdline_lock);
4922 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4923 arch_spin_unlock(&trace_cmdline_lock);
4924
4925 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4926 }
4927
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)4928 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4929 {
4930 kfree(s->saved_cmdlines);
4931 kfree(s->map_cmdline_to_pid);
4932 kfree(s);
4933 }
4934
tracing_resize_saved_cmdlines(unsigned int val)4935 static int tracing_resize_saved_cmdlines(unsigned int val)
4936 {
4937 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4938
4939 s = kmalloc(sizeof(*s), GFP_KERNEL);
4940 if (!s)
4941 return -ENOMEM;
4942
4943 if (allocate_cmdlines_buffer(val, s) < 0) {
4944 kfree(s);
4945 return -ENOMEM;
4946 }
4947
4948 arch_spin_lock(&trace_cmdline_lock);
4949 savedcmd_temp = savedcmd;
4950 savedcmd = s;
4951 arch_spin_unlock(&trace_cmdline_lock);
4952 free_saved_cmdlines_buffer(savedcmd_temp);
4953
4954 return 0;
4955 }
4956
4957 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4958 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4959 size_t cnt, loff_t *ppos)
4960 {
4961 unsigned long val;
4962 int ret;
4963
4964 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4965 if (ret)
4966 return ret;
4967
4968 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4969 if (!val || val > PID_MAX_DEFAULT)
4970 return -EINVAL;
4971
4972 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4973 if (ret < 0)
4974 return ret;
4975
4976 *ppos += cnt;
4977
4978 return cnt;
4979 }
4980
4981 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4982 .open = tracing_open_generic,
4983 .read = tracing_saved_cmdlines_size_read,
4984 .write = tracing_saved_cmdlines_size_write,
4985 };
4986
4987 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4988 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)4989 update_eval_map(union trace_eval_map_item *ptr)
4990 {
4991 if (!ptr->map.eval_string) {
4992 if (ptr->tail.next) {
4993 ptr = ptr->tail.next;
4994 /* Set ptr to the next real item (skip head) */
4995 ptr++;
4996 } else
4997 return NULL;
4998 }
4999 return ptr;
5000 }
5001
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5002 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5003 {
5004 union trace_eval_map_item *ptr = v;
5005
5006 /*
5007 * Paranoid! If ptr points to end, we don't want to increment past it.
5008 * This really should never happen.
5009 */
5010 ptr = update_eval_map(ptr);
5011 if (WARN_ON_ONCE(!ptr))
5012 return NULL;
5013
5014 ptr++;
5015
5016 (*pos)++;
5017
5018 ptr = update_eval_map(ptr);
5019
5020 return ptr;
5021 }
5022
eval_map_start(struct seq_file * m,loff_t * pos)5023 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5024 {
5025 union trace_eval_map_item *v;
5026 loff_t l = 0;
5027
5028 mutex_lock(&trace_eval_mutex);
5029
5030 v = trace_eval_maps;
5031 if (v)
5032 v++;
5033
5034 while (v && l < *pos) {
5035 v = eval_map_next(m, v, &l);
5036 }
5037
5038 return v;
5039 }
5040
eval_map_stop(struct seq_file * m,void * v)5041 static void eval_map_stop(struct seq_file *m, void *v)
5042 {
5043 mutex_unlock(&trace_eval_mutex);
5044 }
5045
eval_map_show(struct seq_file * m,void * v)5046 static int eval_map_show(struct seq_file *m, void *v)
5047 {
5048 union trace_eval_map_item *ptr = v;
5049
5050 seq_printf(m, "%s %ld (%s)\n",
5051 ptr->map.eval_string, ptr->map.eval_value,
5052 ptr->map.system);
5053
5054 return 0;
5055 }
5056
5057 static const struct seq_operations tracing_eval_map_seq_ops = {
5058 .start = eval_map_start,
5059 .next = eval_map_next,
5060 .stop = eval_map_stop,
5061 .show = eval_map_show,
5062 };
5063
tracing_eval_map_open(struct inode * inode,struct file * filp)5064 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5065 {
5066 if (tracing_disabled)
5067 return -ENODEV;
5068
5069 return seq_open(filp, &tracing_eval_map_seq_ops);
5070 }
5071
5072 static const struct file_operations tracing_eval_map_fops = {
5073 .open = tracing_eval_map_open,
5074 .read = seq_read,
5075 .llseek = seq_lseek,
5076 .release = seq_release,
5077 };
5078
5079 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5080 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5081 {
5082 /* Return tail of array given the head */
5083 return ptr + ptr->head.length + 1;
5084 }
5085
5086 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5087 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5088 int len)
5089 {
5090 struct trace_eval_map **stop;
5091 struct trace_eval_map **map;
5092 union trace_eval_map_item *map_array;
5093 union trace_eval_map_item *ptr;
5094
5095 stop = start + len;
5096
5097 /*
5098 * The trace_eval_maps contains the map plus a head and tail item,
5099 * where the head holds the module and length of array, and the
5100 * tail holds a pointer to the next list.
5101 */
5102 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5103 if (!map_array) {
5104 pr_warn("Unable to allocate trace eval mapping\n");
5105 return;
5106 }
5107
5108 mutex_lock(&trace_eval_mutex);
5109
5110 if (!trace_eval_maps)
5111 trace_eval_maps = map_array;
5112 else {
5113 ptr = trace_eval_maps;
5114 for (;;) {
5115 ptr = trace_eval_jmp_to_tail(ptr);
5116 if (!ptr->tail.next)
5117 break;
5118 ptr = ptr->tail.next;
5119
5120 }
5121 ptr->tail.next = map_array;
5122 }
5123 map_array->head.mod = mod;
5124 map_array->head.length = len;
5125 map_array++;
5126
5127 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5128 map_array->map = **map;
5129 map_array++;
5130 }
5131 memset(map_array, 0, sizeof(*map_array));
5132
5133 mutex_unlock(&trace_eval_mutex);
5134 }
5135
trace_create_eval_file(struct dentry * d_tracer)5136 static void trace_create_eval_file(struct dentry *d_tracer)
5137 {
5138 trace_create_file("eval_map", 0444, d_tracer,
5139 NULL, &tracing_eval_map_fops);
5140 }
5141
5142 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5143 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5144 static inline void trace_insert_eval_map_file(struct module *mod,
5145 struct trace_eval_map **start, int len) { }
5146 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5147
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5148 static void trace_insert_eval_map(struct module *mod,
5149 struct trace_eval_map **start, int len)
5150 {
5151 struct trace_eval_map **map;
5152
5153 if (len <= 0)
5154 return;
5155
5156 map = start;
5157
5158 trace_event_eval_update(map, len);
5159
5160 trace_insert_eval_map_file(mod, start, len);
5161 }
5162
5163 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5164 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5165 size_t cnt, loff_t *ppos)
5166 {
5167 struct trace_array *tr = filp->private_data;
5168 char buf[MAX_TRACER_SIZE+2];
5169 int r;
5170
5171 mutex_lock(&trace_types_lock);
5172 r = sprintf(buf, "%s\n", tr->current_trace->name);
5173 mutex_unlock(&trace_types_lock);
5174
5175 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5176 }
5177
tracer_init(struct tracer * t,struct trace_array * tr)5178 int tracer_init(struct tracer *t, struct trace_array *tr)
5179 {
5180 tracing_reset_online_cpus(&tr->trace_buffer);
5181 return t->init(tr);
5182 }
5183
set_buffer_entries(struct trace_buffer * buf,unsigned long val)5184 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5185 {
5186 int cpu;
5187
5188 for_each_tracing_cpu(cpu)
5189 per_cpu_ptr(buf->data, cpu)->entries = val;
5190 }
5191
5192 #ifdef CONFIG_TRACER_MAX_TRACE
5193 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct trace_buffer * trace_buf,struct trace_buffer * size_buf,int cpu_id)5194 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5195 struct trace_buffer *size_buf, int cpu_id)
5196 {
5197 int cpu, ret = 0;
5198
5199 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5200 for_each_tracing_cpu(cpu) {
5201 ret = ring_buffer_resize(trace_buf->buffer,
5202 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5203 if (ret < 0)
5204 break;
5205 per_cpu_ptr(trace_buf->data, cpu)->entries =
5206 per_cpu_ptr(size_buf->data, cpu)->entries;
5207 }
5208 } else {
5209 ret = ring_buffer_resize(trace_buf->buffer,
5210 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5211 if (ret == 0)
5212 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5213 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5214 }
5215
5216 return ret;
5217 }
5218 #endif /* CONFIG_TRACER_MAX_TRACE */
5219
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5220 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5221 unsigned long size, int cpu)
5222 {
5223 int ret;
5224
5225 /*
5226 * If kernel or user changes the size of the ring buffer
5227 * we use the size that was given, and we can forget about
5228 * expanding it later.
5229 */
5230 ring_buffer_expanded = true;
5231
5232 /* May be called before buffers are initialized */
5233 if (!tr->trace_buffer.buffer)
5234 return 0;
5235
5236 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5237 if (ret < 0)
5238 return ret;
5239
5240 #ifdef CONFIG_TRACER_MAX_TRACE
5241 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5242 !tr->current_trace->use_max_tr)
5243 goto out;
5244
5245 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5246 if (ret < 0) {
5247 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5248 &tr->trace_buffer, cpu);
5249 if (r < 0) {
5250 /*
5251 * AARGH! We are left with different
5252 * size max buffer!!!!
5253 * The max buffer is our "snapshot" buffer.
5254 * When a tracer needs a snapshot (one of the
5255 * latency tracers), it swaps the max buffer
5256 * with the saved snap shot. We succeeded to
5257 * update the size of the main buffer, but failed to
5258 * update the size of the max buffer. But when we tried
5259 * to reset the main buffer to the original size, we
5260 * failed there too. This is very unlikely to
5261 * happen, but if it does, warn and kill all
5262 * tracing.
5263 */
5264 WARN_ON(1);
5265 tracing_disabled = 1;
5266 }
5267 return ret;
5268 }
5269
5270 if (cpu == RING_BUFFER_ALL_CPUS)
5271 set_buffer_entries(&tr->max_buffer, size);
5272 else
5273 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5274
5275 out:
5276 #endif /* CONFIG_TRACER_MAX_TRACE */
5277
5278 if (cpu == RING_BUFFER_ALL_CPUS)
5279 set_buffer_entries(&tr->trace_buffer, size);
5280 else
5281 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5282
5283 return ret;
5284 }
5285
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5286 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5287 unsigned long size, int cpu_id)
5288 {
5289 int ret = size;
5290
5291 mutex_lock(&trace_types_lock);
5292
5293 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5294 /* make sure, this cpu is enabled in the mask */
5295 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5296 ret = -EINVAL;
5297 goto out;
5298 }
5299 }
5300
5301 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5302 if (ret < 0)
5303 ret = -ENOMEM;
5304
5305 out:
5306 mutex_unlock(&trace_types_lock);
5307
5308 return ret;
5309 }
5310
5311
5312 /**
5313 * tracing_update_buffers - used by tracing facility to expand ring buffers
5314 *
5315 * To save on memory when the tracing is never used on a system with it
5316 * configured in. The ring buffers are set to a minimum size. But once
5317 * a user starts to use the tracing facility, then they need to grow
5318 * to their default size.
5319 *
5320 * This function is to be called when a tracer is about to be used.
5321 */
tracing_update_buffers(void)5322 int tracing_update_buffers(void)
5323 {
5324 int ret = 0;
5325
5326 mutex_lock(&trace_types_lock);
5327 if (!ring_buffer_expanded)
5328 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5329 RING_BUFFER_ALL_CPUS);
5330 mutex_unlock(&trace_types_lock);
5331
5332 return ret;
5333 }
5334
5335 struct trace_option_dentry;
5336
5337 static void
5338 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5339
5340 /*
5341 * Used to clear out the tracer before deletion of an instance.
5342 * Must have trace_types_lock held.
5343 */
tracing_set_nop(struct trace_array * tr)5344 static void tracing_set_nop(struct trace_array *tr)
5345 {
5346 if (tr->current_trace == &nop_trace)
5347 return;
5348
5349 tr->current_trace->enabled--;
5350
5351 if (tr->current_trace->reset)
5352 tr->current_trace->reset(tr);
5353
5354 tr->current_trace = &nop_trace;
5355 }
5356
add_tracer_options(struct trace_array * tr,struct tracer * t)5357 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5358 {
5359 /* Only enable if the directory has been created already. */
5360 if (!tr->dir)
5361 return;
5362
5363 create_trace_option_files(tr, t);
5364 }
5365
tracing_set_tracer(struct trace_array * tr,const char * buf)5366 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5367 {
5368 struct tracer *t;
5369 #ifdef CONFIG_TRACER_MAX_TRACE
5370 bool had_max_tr;
5371 #endif
5372 int ret = 0;
5373
5374 mutex_lock(&trace_types_lock);
5375
5376 if (!ring_buffer_expanded) {
5377 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5378 RING_BUFFER_ALL_CPUS);
5379 if (ret < 0)
5380 goto out;
5381 ret = 0;
5382 }
5383
5384 for (t = trace_types; t; t = t->next) {
5385 if (strcmp(t->name, buf) == 0)
5386 break;
5387 }
5388 if (!t) {
5389 ret = -EINVAL;
5390 goto out;
5391 }
5392 if (t == tr->current_trace)
5393 goto out;
5394
5395 /* Some tracers won't work on kernel command line */
5396 if (system_state < SYSTEM_RUNNING && t->noboot) {
5397 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5398 t->name);
5399 goto out;
5400 }
5401
5402 /* Some tracers are only allowed for the top level buffer */
5403 if (!trace_ok_for_array(t, tr)) {
5404 ret = -EINVAL;
5405 goto out;
5406 }
5407
5408 /* If trace pipe files are being read, we can't change the tracer */
5409 if (tr->current_trace->ref) {
5410 ret = -EBUSY;
5411 goto out;
5412 }
5413
5414 trace_branch_disable();
5415
5416 tr->current_trace->enabled--;
5417
5418 if (tr->current_trace->reset)
5419 tr->current_trace->reset(tr);
5420
5421 /* Current trace needs to be nop_trace before synchronize_sched */
5422 tr->current_trace = &nop_trace;
5423
5424 #ifdef CONFIG_TRACER_MAX_TRACE
5425 had_max_tr = tr->allocated_snapshot;
5426
5427 if (had_max_tr && !t->use_max_tr) {
5428 /*
5429 * We need to make sure that the update_max_tr sees that
5430 * current_trace changed to nop_trace to keep it from
5431 * swapping the buffers after we resize it.
5432 * The update_max_tr is called from interrupts disabled
5433 * so a synchronized_sched() is sufficient.
5434 */
5435 synchronize_sched();
5436 free_snapshot(tr);
5437 }
5438 #endif
5439
5440 #ifdef CONFIG_TRACER_MAX_TRACE
5441 if (t->use_max_tr && !had_max_tr) {
5442 ret = tracing_alloc_snapshot_instance(tr);
5443 if (ret < 0)
5444 goto out;
5445 }
5446 #endif
5447
5448 if (t->init) {
5449 ret = tracer_init(t, tr);
5450 if (ret)
5451 goto out;
5452 }
5453
5454 tr->current_trace = t;
5455 tr->current_trace->enabled++;
5456 trace_branch_enable(tr);
5457 out:
5458 mutex_unlock(&trace_types_lock);
5459
5460 return ret;
5461 }
5462
5463 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5464 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5465 size_t cnt, loff_t *ppos)
5466 {
5467 struct trace_array *tr = filp->private_data;
5468 char buf[MAX_TRACER_SIZE+1];
5469 int i;
5470 size_t ret;
5471 int err;
5472
5473 ret = cnt;
5474
5475 if (cnt > MAX_TRACER_SIZE)
5476 cnt = MAX_TRACER_SIZE;
5477
5478 if (copy_from_user(buf, ubuf, cnt))
5479 return -EFAULT;
5480
5481 buf[cnt] = 0;
5482
5483 /* strip ending whitespace. */
5484 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5485 buf[i] = 0;
5486
5487 err = tracing_set_tracer(tr, buf);
5488 if (err)
5489 return err;
5490
5491 *ppos += ret;
5492
5493 return ret;
5494 }
5495
5496 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5497 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5498 size_t cnt, loff_t *ppos)
5499 {
5500 char buf[64];
5501 int r;
5502
5503 r = snprintf(buf, sizeof(buf), "%ld\n",
5504 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5505 if (r > sizeof(buf))
5506 r = sizeof(buf);
5507 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5508 }
5509
5510 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5511 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5512 size_t cnt, loff_t *ppos)
5513 {
5514 unsigned long val;
5515 int ret;
5516
5517 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5518 if (ret)
5519 return ret;
5520
5521 *ptr = val * 1000;
5522
5523 return cnt;
5524 }
5525
5526 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5527 tracing_thresh_read(struct file *filp, char __user *ubuf,
5528 size_t cnt, loff_t *ppos)
5529 {
5530 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5531 }
5532
5533 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5534 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5535 size_t cnt, loff_t *ppos)
5536 {
5537 struct trace_array *tr = filp->private_data;
5538 int ret;
5539
5540 mutex_lock(&trace_types_lock);
5541 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5542 if (ret < 0)
5543 goto out;
5544
5545 if (tr->current_trace->update_thresh) {
5546 ret = tr->current_trace->update_thresh(tr);
5547 if (ret < 0)
5548 goto out;
5549 }
5550
5551 ret = cnt;
5552 out:
5553 mutex_unlock(&trace_types_lock);
5554
5555 return ret;
5556 }
5557
5558 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5559
5560 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5561 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5562 size_t cnt, loff_t *ppos)
5563 {
5564 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5565 }
5566
5567 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5568 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5569 size_t cnt, loff_t *ppos)
5570 {
5571 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5572 }
5573
5574 #endif
5575
tracing_open_pipe(struct inode * inode,struct file * filp)5576 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5577 {
5578 struct trace_array *tr = inode->i_private;
5579 struct trace_iterator *iter;
5580 int ret = 0;
5581
5582 if (tracing_disabled)
5583 return -ENODEV;
5584
5585 if (trace_array_get(tr) < 0)
5586 return -ENODEV;
5587
5588 mutex_lock(&trace_types_lock);
5589
5590 /* create a buffer to store the information to pass to userspace */
5591 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5592 if (!iter) {
5593 ret = -ENOMEM;
5594 __trace_array_put(tr);
5595 goto out;
5596 }
5597
5598 trace_seq_init(&iter->seq);
5599 iter->trace = tr->current_trace;
5600
5601 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5602 ret = -ENOMEM;
5603 goto fail;
5604 }
5605
5606 /* trace pipe does not show start of buffer */
5607 cpumask_setall(iter->started);
5608
5609 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5610 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5611
5612 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5613 if (trace_clocks[tr->clock_id].in_ns)
5614 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5615
5616 iter->tr = tr;
5617 iter->trace_buffer = &tr->trace_buffer;
5618 iter->cpu_file = tracing_get_cpu(inode);
5619 mutex_init(&iter->mutex);
5620 filp->private_data = iter;
5621
5622 if (iter->trace->pipe_open)
5623 iter->trace->pipe_open(iter);
5624
5625 nonseekable_open(inode, filp);
5626
5627 tr->current_trace->ref++;
5628 out:
5629 mutex_unlock(&trace_types_lock);
5630 return ret;
5631
5632 fail:
5633 kfree(iter);
5634 __trace_array_put(tr);
5635 mutex_unlock(&trace_types_lock);
5636 return ret;
5637 }
5638
tracing_release_pipe(struct inode * inode,struct file * file)5639 static int tracing_release_pipe(struct inode *inode, struct file *file)
5640 {
5641 struct trace_iterator *iter = file->private_data;
5642 struct trace_array *tr = inode->i_private;
5643
5644 mutex_lock(&trace_types_lock);
5645
5646 tr->current_trace->ref--;
5647
5648 if (iter->trace->pipe_close)
5649 iter->trace->pipe_close(iter);
5650
5651 mutex_unlock(&trace_types_lock);
5652
5653 free_cpumask_var(iter->started);
5654 mutex_destroy(&iter->mutex);
5655 kfree(iter);
5656
5657 trace_array_put(tr);
5658
5659 return 0;
5660 }
5661
5662 static unsigned int
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5663 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5664 {
5665 struct trace_array *tr = iter->tr;
5666
5667 /* Iterators are static, they should be filled or empty */
5668 if (trace_buffer_iter(iter, iter->cpu_file))
5669 return POLLIN | POLLRDNORM;
5670
5671 if (tr->trace_flags & TRACE_ITER_BLOCK)
5672 /*
5673 * Always select as readable when in blocking mode
5674 */
5675 return POLLIN | POLLRDNORM;
5676 else
5677 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5678 filp, poll_table);
5679 }
5680
5681 static unsigned int
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5682 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5683 {
5684 struct trace_iterator *iter = filp->private_data;
5685
5686 return trace_poll(iter, filp, poll_table);
5687 }
5688
5689 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5690 static int tracing_wait_pipe(struct file *filp)
5691 {
5692 struct trace_iterator *iter = filp->private_data;
5693 int ret;
5694
5695 while (trace_empty(iter)) {
5696
5697 if ((filp->f_flags & O_NONBLOCK)) {
5698 return -EAGAIN;
5699 }
5700
5701 /*
5702 * We block until we read something and tracing is disabled.
5703 * We still block if tracing is disabled, but we have never
5704 * read anything. This allows a user to cat this file, and
5705 * then enable tracing. But after we have read something,
5706 * we give an EOF when tracing is again disabled.
5707 *
5708 * iter->pos will be 0 if we haven't read anything.
5709 */
5710 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5711 break;
5712
5713 mutex_unlock(&iter->mutex);
5714
5715 ret = wait_on_pipe(iter, false);
5716
5717 mutex_lock(&iter->mutex);
5718
5719 if (ret)
5720 return ret;
5721 }
5722
5723 return 1;
5724 }
5725
5726 /*
5727 * Consumer reader.
5728 */
5729 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5730 tracing_read_pipe(struct file *filp, char __user *ubuf,
5731 size_t cnt, loff_t *ppos)
5732 {
5733 struct trace_iterator *iter = filp->private_data;
5734 ssize_t sret;
5735
5736 /*
5737 * Avoid more than one consumer on a single file descriptor
5738 * This is just a matter of traces coherency, the ring buffer itself
5739 * is protected.
5740 */
5741 mutex_lock(&iter->mutex);
5742
5743 /* return any leftover data */
5744 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5745 if (sret != -EBUSY)
5746 goto out;
5747
5748 trace_seq_init(&iter->seq);
5749
5750 if (iter->trace->read) {
5751 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5752 if (sret)
5753 goto out;
5754 }
5755
5756 waitagain:
5757 sret = tracing_wait_pipe(filp);
5758 if (sret <= 0)
5759 goto out;
5760
5761 /* stop when tracing is finished */
5762 if (trace_empty(iter)) {
5763 sret = 0;
5764 goto out;
5765 }
5766
5767 if (cnt >= PAGE_SIZE)
5768 cnt = PAGE_SIZE - 1;
5769
5770 /* reset all but tr, trace, and overruns */
5771 memset(&iter->seq, 0,
5772 sizeof(struct trace_iterator) -
5773 offsetof(struct trace_iterator, seq));
5774 cpumask_clear(iter->started);
5775 trace_seq_init(&iter->seq);
5776 iter->pos = -1;
5777
5778 trace_event_read_lock();
5779 trace_access_lock(iter->cpu_file);
5780 while (trace_find_next_entry_inc(iter) != NULL) {
5781 enum print_line_t ret;
5782 int save_len = iter->seq.seq.len;
5783
5784 ret = print_trace_line(iter);
5785 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5786 /* don't print partial lines */
5787 iter->seq.seq.len = save_len;
5788 break;
5789 }
5790 if (ret != TRACE_TYPE_NO_CONSUME)
5791 trace_consume(iter);
5792
5793 if (trace_seq_used(&iter->seq) >= cnt)
5794 break;
5795
5796 /*
5797 * Setting the full flag means we reached the trace_seq buffer
5798 * size and we should leave by partial output condition above.
5799 * One of the trace_seq_* functions is not used properly.
5800 */
5801 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5802 iter->ent->type);
5803 }
5804 trace_access_unlock(iter->cpu_file);
5805 trace_event_read_unlock();
5806
5807 /* Now copy what we have to the user */
5808 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5809 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5810 trace_seq_init(&iter->seq);
5811
5812 /*
5813 * If there was nothing to send to user, in spite of consuming trace
5814 * entries, go back to wait for more entries.
5815 */
5816 if (sret == -EBUSY)
5817 goto waitagain;
5818
5819 out:
5820 mutex_unlock(&iter->mutex);
5821
5822 return sret;
5823 }
5824
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)5825 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5826 unsigned int idx)
5827 {
5828 __free_page(spd->pages[idx]);
5829 }
5830
5831 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5832 .can_merge = 0,
5833 .confirm = generic_pipe_buf_confirm,
5834 .release = generic_pipe_buf_release,
5835 .steal = generic_pipe_buf_steal,
5836 .get = generic_pipe_buf_get,
5837 };
5838
5839 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)5840 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5841 {
5842 size_t count;
5843 int save_len;
5844 int ret;
5845
5846 /* Seq buffer is page-sized, exactly what we need. */
5847 for (;;) {
5848 save_len = iter->seq.seq.len;
5849 ret = print_trace_line(iter);
5850
5851 if (trace_seq_has_overflowed(&iter->seq)) {
5852 iter->seq.seq.len = save_len;
5853 break;
5854 }
5855
5856 /*
5857 * This should not be hit, because it should only
5858 * be set if the iter->seq overflowed. But check it
5859 * anyway to be safe.
5860 */
5861 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5862 iter->seq.seq.len = save_len;
5863 break;
5864 }
5865
5866 count = trace_seq_used(&iter->seq) - save_len;
5867 if (rem < count) {
5868 rem = 0;
5869 iter->seq.seq.len = save_len;
5870 break;
5871 }
5872
5873 if (ret != TRACE_TYPE_NO_CONSUME)
5874 trace_consume(iter);
5875 rem -= count;
5876 if (!trace_find_next_entry_inc(iter)) {
5877 rem = 0;
5878 iter->ent = NULL;
5879 break;
5880 }
5881 }
5882
5883 return rem;
5884 }
5885
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)5886 static ssize_t tracing_splice_read_pipe(struct file *filp,
5887 loff_t *ppos,
5888 struct pipe_inode_info *pipe,
5889 size_t len,
5890 unsigned int flags)
5891 {
5892 struct page *pages_def[PIPE_DEF_BUFFERS];
5893 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5894 struct trace_iterator *iter = filp->private_data;
5895 struct splice_pipe_desc spd = {
5896 .pages = pages_def,
5897 .partial = partial_def,
5898 .nr_pages = 0, /* This gets updated below. */
5899 .nr_pages_max = PIPE_DEF_BUFFERS,
5900 .ops = &tracing_pipe_buf_ops,
5901 .spd_release = tracing_spd_release_pipe,
5902 };
5903 ssize_t ret;
5904 size_t rem;
5905 unsigned int i;
5906
5907 if (splice_grow_spd(pipe, &spd))
5908 return -ENOMEM;
5909
5910 mutex_lock(&iter->mutex);
5911
5912 if (iter->trace->splice_read) {
5913 ret = iter->trace->splice_read(iter, filp,
5914 ppos, pipe, len, flags);
5915 if (ret)
5916 goto out_err;
5917 }
5918
5919 ret = tracing_wait_pipe(filp);
5920 if (ret <= 0)
5921 goto out_err;
5922
5923 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5924 ret = -EFAULT;
5925 goto out_err;
5926 }
5927
5928 trace_event_read_lock();
5929 trace_access_lock(iter->cpu_file);
5930
5931 /* Fill as many pages as possible. */
5932 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5933 spd.pages[i] = alloc_page(GFP_KERNEL);
5934 if (!spd.pages[i])
5935 break;
5936
5937 rem = tracing_fill_pipe_page(rem, iter);
5938
5939 /* Copy the data into the page, so we can start over. */
5940 ret = trace_seq_to_buffer(&iter->seq,
5941 page_address(spd.pages[i]),
5942 trace_seq_used(&iter->seq));
5943 if (ret < 0) {
5944 __free_page(spd.pages[i]);
5945 break;
5946 }
5947 spd.partial[i].offset = 0;
5948 spd.partial[i].len = trace_seq_used(&iter->seq);
5949
5950 trace_seq_init(&iter->seq);
5951 }
5952
5953 trace_access_unlock(iter->cpu_file);
5954 trace_event_read_unlock();
5955 mutex_unlock(&iter->mutex);
5956
5957 spd.nr_pages = i;
5958
5959 if (i)
5960 ret = splice_to_pipe(pipe, &spd);
5961 else
5962 ret = 0;
5963 out:
5964 splice_shrink_spd(&spd);
5965 return ret;
5966
5967 out_err:
5968 mutex_unlock(&iter->mutex);
5969 goto out;
5970 }
5971
5972 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5973 tracing_entries_read(struct file *filp, char __user *ubuf,
5974 size_t cnt, loff_t *ppos)
5975 {
5976 struct inode *inode = file_inode(filp);
5977 struct trace_array *tr = inode->i_private;
5978 int cpu = tracing_get_cpu(inode);
5979 char buf[64];
5980 int r = 0;
5981 ssize_t ret;
5982
5983 mutex_lock(&trace_types_lock);
5984
5985 if (cpu == RING_BUFFER_ALL_CPUS) {
5986 int cpu, buf_size_same;
5987 unsigned long size;
5988
5989 size = 0;
5990 buf_size_same = 1;
5991 /* check if all cpu sizes are same */
5992 for_each_tracing_cpu(cpu) {
5993 /* fill in the size from first enabled cpu */
5994 if (size == 0)
5995 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5996 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5997 buf_size_same = 0;
5998 break;
5999 }
6000 }
6001
6002 if (buf_size_same) {
6003 if (!ring_buffer_expanded)
6004 r = sprintf(buf, "%lu (expanded: %lu)\n",
6005 size >> 10,
6006 trace_buf_size >> 10);
6007 else
6008 r = sprintf(buf, "%lu\n", size >> 10);
6009 } else
6010 r = sprintf(buf, "X\n");
6011 } else
6012 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6013
6014 mutex_unlock(&trace_types_lock);
6015
6016 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6017 return ret;
6018 }
6019
6020 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6021 tracing_entries_write(struct file *filp, const char __user *ubuf,
6022 size_t cnt, loff_t *ppos)
6023 {
6024 struct inode *inode = file_inode(filp);
6025 struct trace_array *tr = inode->i_private;
6026 unsigned long val;
6027 int ret;
6028
6029 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6030 if (ret)
6031 return ret;
6032
6033 /* must have at least 1 entry */
6034 if (!val)
6035 return -EINVAL;
6036
6037 /* value is in KB */
6038 val <<= 10;
6039 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6040 if (ret < 0)
6041 return ret;
6042
6043 *ppos += cnt;
6044
6045 return cnt;
6046 }
6047
6048 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6049 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6050 size_t cnt, loff_t *ppos)
6051 {
6052 struct trace_array *tr = filp->private_data;
6053 char buf[64];
6054 int r, cpu;
6055 unsigned long size = 0, expanded_size = 0;
6056
6057 mutex_lock(&trace_types_lock);
6058 for_each_tracing_cpu(cpu) {
6059 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6060 if (!ring_buffer_expanded)
6061 expanded_size += trace_buf_size >> 10;
6062 }
6063 if (ring_buffer_expanded)
6064 r = sprintf(buf, "%lu\n", size);
6065 else
6066 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6067 mutex_unlock(&trace_types_lock);
6068
6069 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6070 }
6071
6072 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6073 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6074 size_t cnt, loff_t *ppos)
6075 {
6076 /*
6077 * There is no need to read what the user has written, this function
6078 * is just to make sure that there is no error when "echo" is used
6079 */
6080
6081 *ppos += cnt;
6082
6083 return cnt;
6084 }
6085
6086 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6087 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6088 {
6089 struct trace_array *tr = inode->i_private;
6090
6091 /* disable tracing ? */
6092 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6093 tracer_tracing_off(tr);
6094 /* resize the ring buffer to 0 */
6095 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6096
6097 trace_array_put(tr);
6098
6099 return 0;
6100 }
6101
6102 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6103 tracing_mark_write(struct file *filp, const char __user *ubuf,
6104 size_t cnt, loff_t *fpos)
6105 {
6106 struct trace_array *tr = filp->private_data;
6107 struct ring_buffer_event *event;
6108 struct ring_buffer *buffer;
6109 struct print_entry *entry;
6110 unsigned long irq_flags;
6111 const char faulted[] = "<faulted>";
6112 ssize_t written;
6113 int size;
6114 int len;
6115
6116 /* Used in tracing_mark_raw_write() as well */
6117 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6118
6119 if (tracing_disabled)
6120 return -EINVAL;
6121
6122 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6123 return -EINVAL;
6124
6125 if (cnt > TRACE_BUF_SIZE)
6126 cnt = TRACE_BUF_SIZE;
6127
6128 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6129
6130 local_save_flags(irq_flags);
6131 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6132
6133 /* If less than "<faulted>", then make sure we can still add that */
6134 if (cnt < FAULTED_SIZE)
6135 size += FAULTED_SIZE - cnt;
6136
6137 buffer = tr->trace_buffer.buffer;
6138 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6139 irq_flags, preempt_count());
6140 if (unlikely(!event))
6141 /* Ring buffer disabled, return as if not open for write */
6142 return -EBADF;
6143
6144 entry = ring_buffer_event_data(event);
6145 entry->ip = _THIS_IP_;
6146
6147 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6148 if (len) {
6149 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6150 cnt = FAULTED_SIZE;
6151 written = -EFAULT;
6152 } else
6153 written = cnt;
6154 len = cnt;
6155
6156 if (entry->buf[cnt - 1] != '\n') {
6157 entry->buf[cnt] = '\n';
6158 entry->buf[cnt + 1] = '\0';
6159 } else
6160 entry->buf[cnt] = '\0';
6161
6162 __buffer_unlock_commit(buffer, event);
6163
6164 if (written > 0)
6165 *fpos += written;
6166
6167 return written;
6168 }
6169
6170 /* Limit it for now to 3K (including tag) */
6171 #define RAW_DATA_MAX_SIZE (1024*3)
6172
6173 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6174 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6175 size_t cnt, loff_t *fpos)
6176 {
6177 struct trace_array *tr = filp->private_data;
6178 struct ring_buffer_event *event;
6179 struct ring_buffer *buffer;
6180 struct raw_data_entry *entry;
6181 const char faulted[] = "<faulted>";
6182 unsigned long irq_flags;
6183 ssize_t written;
6184 int size;
6185 int len;
6186
6187 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6188
6189 if (tracing_disabled)
6190 return -EINVAL;
6191
6192 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6193 return -EINVAL;
6194
6195 /* The marker must at least have a tag id */
6196 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6197 return -EINVAL;
6198
6199 if (cnt > TRACE_BUF_SIZE)
6200 cnt = TRACE_BUF_SIZE;
6201
6202 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6203
6204 local_save_flags(irq_flags);
6205 size = sizeof(*entry) + cnt;
6206 if (cnt < FAULT_SIZE_ID)
6207 size += FAULT_SIZE_ID - cnt;
6208
6209 buffer = tr->trace_buffer.buffer;
6210 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6211 irq_flags, preempt_count());
6212 if (!event)
6213 /* Ring buffer disabled, return as if not open for write */
6214 return -EBADF;
6215
6216 entry = ring_buffer_event_data(event);
6217
6218 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6219 if (len) {
6220 entry->id = -1;
6221 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6222 written = -EFAULT;
6223 } else
6224 written = cnt;
6225
6226 __buffer_unlock_commit(buffer, event);
6227
6228 if (written > 0)
6229 *fpos += written;
6230
6231 return written;
6232 }
6233
tracing_clock_show(struct seq_file * m,void * v)6234 static int tracing_clock_show(struct seq_file *m, void *v)
6235 {
6236 struct trace_array *tr = m->private;
6237 int i;
6238
6239 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6240 seq_printf(m,
6241 "%s%s%s%s", i ? " " : "",
6242 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6243 i == tr->clock_id ? "]" : "");
6244 seq_putc(m, '\n');
6245
6246 return 0;
6247 }
6248
tracing_set_clock(struct trace_array * tr,const char * clockstr)6249 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6250 {
6251 int i;
6252
6253 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6254 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6255 break;
6256 }
6257 if (i == ARRAY_SIZE(trace_clocks))
6258 return -EINVAL;
6259
6260 mutex_lock(&trace_types_lock);
6261
6262 tr->clock_id = i;
6263
6264 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6265
6266 /*
6267 * New clock may not be consistent with the previous clock.
6268 * Reset the buffer so that it doesn't have incomparable timestamps.
6269 */
6270 tracing_reset_online_cpus(&tr->trace_buffer);
6271
6272 #ifdef CONFIG_TRACER_MAX_TRACE
6273 if (tr->max_buffer.buffer)
6274 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6275 tracing_reset_online_cpus(&tr->max_buffer);
6276 #endif
6277
6278 mutex_unlock(&trace_types_lock);
6279
6280 return 0;
6281 }
6282
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6283 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6284 size_t cnt, loff_t *fpos)
6285 {
6286 struct seq_file *m = filp->private_data;
6287 struct trace_array *tr = m->private;
6288 char buf[64];
6289 const char *clockstr;
6290 int ret;
6291
6292 if (cnt >= sizeof(buf))
6293 return -EINVAL;
6294
6295 if (copy_from_user(buf, ubuf, cnt))
6296 return -EFAULT;
6297
6298 buf[cnt] = 0;
6299
6300 clockstr = strstrip(buf);
6301
6302 ret = tracing_set_clock(tr, clockstr);
6303 if (ret)
6304 return ret;
6305
6306 *fpos += cnt;
6307
6308 return cnt;
6309 }
6310
tracing_clock_open(struct inode * inode,struct file * file)6311 static int tracing_clock_open(struct inode *inode, struct file *file)
6312 {
6313 struct trace_array *tr = inode->i_private;
6314 int ret;
6315
6316 if (tracing_disabled)
6317 return -ENODEV;
6318
6319 if (trace_array_get(tr))
6320 return -ENODEV;
6321
6322 ret = single_open(file, tracing_clock_show, inode->i_private);
6323 if (ret < 0)
6324 trace_array_put(tr);
6325
6326 return ret;
6327 }
6328
6329 struct ftrace_buffer_info {
6330 struct trace_iterator iter;
6331 void *spare;
6332 unsigned int spare_cpu;
6333 unsigned int read;
6334 };
6335
6336 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)6337 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6338 {
6339 struct trace_array *tr = inode->i_private;
6340 struct trace_iterator *iter;
6341 struct seq_file *m;
6342 int ret = 0;
6343
6344 if (trace_array_get(tr) < 0)
6345 return -ENODEV;
6346
6347 if (file->f_mode & FMODE_READ) {
6348 iter = __tracing_open(inode, file, true);
6349 if (IS_ERR(iter))
6350 ret = PTR_ERR(iter);
6351 } else {
6352 /* Writes still need the seq_file to hold the private data */
6353 ret = -ENOMEM;
6354 m = kzalloc(sizeof(*m), GFP_KERNEL);
6355 if (!m)
6356 goto out;
6357 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6358 if (!iter) {
6359 kfree(m);
6360 goto out;
6361 }
6362 ret = 0;
6363
6364 iter->tr = tr;
6365 iter->trace_buffer = &tr->max_buffer;
6366 iter->cpu_file = tracing_get_cpu(inode);
6367 m->private = iter;
6368 file->private_data = m;
6369 }
6370 out:
6371 if (ret < 0)
6372 trace_array_put(tr);
6373
6374 return ret;
6375 }
6376
6377 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6378 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6379 loff_t *ppos)
6380 {
6381 struct seq_file *m = filp->private_data;
6382 struct trace_iterator *iter = m->private;
6383 struct trace_array *tr = iter->tr;
6384 unsigned long val;
6385 int ret;
6386
6387 ret = tracing_update_buffers();
6388 if (ret < 0)
6389 return ret;
6390
6391 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6392 if (ret)
6393 return ret;
6394
6395 mutex_lock(&trace_types_lock);
6396
6397 if (tr->current_trace->use_max_tr) {
6398 ret = -EBUSY;
6399 goto out;
6400 }
6401
6402 switch (val) {
6403 case 0:
6404 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6405 ret = -EINVAL;
6406 break;
6407 }
6408 if (tr->allocated_snapshot)
6409 free_snapshot(tr);
6410 break;
6411 case 1:
6412 /* Only allow per-cpu swap if the ring buffer supports it */
6413 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6414 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6415 ret = -EINVAL;
6416 break;
6417 }
6418 #endif
6419 if (tr->allocated_snapshot)
6420 ret = resize_buffer_duplicate_size(&tr->max_buffer,
6421 &tr->trace_buffer, iter->cpu_file);
6422 else
6423 ret = tracing_alloc_snapshot_instance(tr);
6424 if (ret < 0)
6425 break;
6426 local_irq_disable();
6427 /* Now, we're going to swap */
6428 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6429 update_max_tr(tr, current, smp_processor_id());
6430 else
6431 update_max_tr_single(tr, current, iter->cpu_file);
6432 local_irq_enable();
6433 break;
6434 default:
6435 if (tr->allocated_snapshot) {
6436 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6437 tracing_reset_online_cpus(&tr->max_buffer);
6438 else
6439 tracing_reset(&tr->max_buffer, iter->cpu_file);
6440 }
6441 break;
6442 }
6443
6444 if (ret >= 0) {
6445 *ppos += cnt;
6446 ret = cnt;
6447 }
6448 out:
6449 mutex_unlock(&trace_types_lock);
6450 return ret;
6451 }
6452
tracing_snapshot_release(struct inode * inode,struct file * file)6453 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6454 {
6455 struct seq_file *m = file->private_data;
6456 int ret;
6457
6458 ret = tracing_release(inode, file);
6459
6460 if (file->f_mode & FMODE_READ)
6461 return ret;
6462
6463 /* If write only, the seq_file is just a stub */
6464 if (m)
6465 kfree(m->private);
6466 kfree(m);
6467
6468 return 0;
6469 }
6470
6471 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6472 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6473 size_t count, loff_t *ppos);
6474 static int tracing_buffers_release(struct inode *inode, struct file *file);
6475 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6476 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6477
snapshot_raw_open(struct inode * inode,struct file * filp)6478 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6479 {
6480 struct ftrace_buffer_info *info;
6481 int ret;
6482
6483 ret = tracing_buffers_open(inode, filp);
6484 if (ret < 0)
6485 return ret;
6486
6487 info = filp->private_data;
6488
6489 if (info->iter.trace->use_max_tr) {
6490 tracing_buffers_release(inode, filp);
6491 return -EBUSY;
6492 }
6493
6494 info->iter.snapshot = true;
6495 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6496
6497 return ret;
6498 }
6499
6500 #endif /* CONFIG_TRACER_SNAPSHOT */
6501
6502
6503 static const struct file_operations tracing_thresh_fops = {
6504 .open = tracing_open_generic,
6505 .read = tracing_thresh_read,
6506 .write = tracing_thresh_write,
6507 .llseek = generic_file_llseek,
6508 };
6509
6510 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6511 static const struct file_operations tracing_max_lat_fops = {
6512 .open = tracing_open_generic,
6513 .read = tracing_max_lat_read,
6514 .write = tracing_max_lat_write,
6515 .llseek = generic_file_llseek,
6516 };
6517 #endif
6518
6519 static const struct file_operations set_tracer_fops = {
6520 .open = tracing_open_generic,
6521 .read = tracing_set_trace_read,
6522 .write = tracing_set_trace_write,
6523 .llseek = generic_file_llseek,
6524 };
6525
6526 static const struct file_operations tracing_pipe_fops = {
6527 .open = tracing_open_pipe,
6528 .poll = tracing_poll_pipe,
6529 .read = tracing_read_pipe,
6530 .splice_read = tracing_splice_read_pipe,
6531 .release = tracing_release_pipe,
6532 .llseek = no_llseek,
6533 };
6534
6535 static const struct file_operations tracing_entries_fops = {
6536 .open = tracing_open_generic_tr,
6537 .read = tracing_entries_read,
6538 .write = tracing_entries_write,
6539 .llseek = generic_file_llseek,
6540 .release = tracing_release_generic_tr,
6541 };
6542
6543 static const struct file_operations tracing_total_entries_fops = {
6544 .open = tracing_open_generic_tr,
6545 .read = tracing_total_entries_read,
6546 .llseek = generic_file_llseek,
6547 .release = tracing_release_generic_tr,
6548 };
6549
6550 static const struct file_operations tracing_free_buffer_fops = {
6551 .open = tracing_open_generic_tr,
6552 .write = tracing_free_buffer_write,
6553 .release = tracing_free_buffer_release,
6554 };
6555
6556 static const struct file_operations tracing_mark_fops = {
6557 .open = tracing_open_generic_tr,
6558 .write = tracing_mark_write,
6559 .llseek = generic_file_llseek,
6560 .release = tracing_release_generic_tr,
6561 };
6562
6563 static const struct file_operations tracing_mark_raw_fops = {
6564 .open = tracing_open_generic_tr,
6565 .write = tracing_mark_raw_write,
6566 .llseek = generic_file_llseek,
6567 .release = tracing_release_generic_tr,
6568 };
6569
6570 static const struct file_operations trace_clock_fops = {
6571 .open = tracing_clock_open,
6572 .read = seq_read,
6573 .llseek = seq_lseek,
6574 .release = tracing_single_release_tr,
6575 .write = tracing_clock_write,
6576 };
6577
6578 #ifdef CONFIG_TRACER_SNAPSHOT
6579 static const struct file_operations snapshot_fops = {
6580 .open = tracing_snapshot_open,
6581 .read = seq_read,
6582 .write = tracing_snapshot_write,
6583 .llseek = tracing_lseek,
6584 .release = tracing_snapshot_release,
6585 };
6586
6587 static const struct file_operations snapshot_raw_fops = {
6588 .open = snapshot_raw_open,
6589 .read = tracing_buffers_read,
6590 .release = tracing_buffers_release,
6591 .splice_read = tracing_buffers_splice_read,
6592 .llseek = no_llseek,
6593 };
6594
6595 #endif /* CONFIG_TRACER_SNAPSHOT */
6596
tracing_buffers_open(struct inode * inode,struct file * filp)6597 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6598 {
6599 struct trace_array *tr = inode->i_private;
6600 struct ftrace_buffer_info *info;
6601 int ret;
6602
6603 if (tracing_disabled)
6604 return -ENODEV;
6605
6606 if (trace_array_get(tr) < 0)
6607 return -ENODEV;
6608
6609 info = kzalloc(sizeof(*info), GFP_KERNEL);
6610 if (!info) {
6611 trace_array_put(tr);
6612 return -ENOMEM;
6613 }
6614
6615 mutex_lock(&trace_types_lock);
6616
6617 info->iter.tr = tr;
6618 info->iter.cpu_file = tracing_get_cpu(inode);
6619 info->iter.trace = tr->current_trace;
6620 info->iter.trace_buffer = &tr->trace_buffer;
6621 info->spare = NULL;
6622 /* Force reading ring buffer for first read */
6623 info->read = (unsigned int)-1;
6624
6625 filp->private_data = info;
6626
6627 tr->current_trace->ref++;
6628
6629 mutex_unlock(&trace_types_lock);
6630
6631 ret = nonseekable_open(inode, filp);
6632 if (ret < 0)
6633 trace_array_put(tr);
6634
6635 return ret;
6636 }
6637
6638 static unsigned int
tracing_buffers_poll(struct file * filp,poll_table * poll_table)6639 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6640 {
6641 struct ftrace_buffer_info *info = filp->private_data;
6642 struct trace_iterator *iter = &info->iter;
6643
6644 return trace_poll(iter, filp, poll_table);
6645 }
6646
6647 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)6648 tracing_buffers_read(struct file *filp, char __user *ubuf,
6649 size_t count, loff_t *ppos)
6650 {
6651 struct ftrace_buffer_info *info = filp->private_data;
6652 struct trace_iterator *iter = &info->iter;
6653 ssize_t ret = 0;
6654 ssize_t size;
6655
6656 if (!count)
6657 return 0;
6658
6659 #ifdef CONFIG_TRACER_MAX_TRACE
6660 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6661 return -EBUSY;
6662 #endif
6663
6664 if (!info->spare) {
6665 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6666 iter->cpu_file);
6667 if (IS_ERR(info->spare)) {
6668 ret = PTR_ERR(info->spare);
6669 info->spare = NULL;
6670 } else {
6671 info->spare_cpu = iter->cpu_file;
6672 }
6673 }
6674 if (!info->spare)
6675 return ret;
6676
6677 /* Do we have previous read data to read? */
6678 if (info->read < PAGE_SIZE)
6679 goto read;
6680
6681 again:
6682 trace_access_lock(iter->cpu_file);
6683 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6684 &info->spare,
6685 count,
6686 iter->cpu_file, 0);
6687 trace_access_unlock(iter->cpu_file);
6688
6689 if (ret < 0) {
6690 if (trace_empty(iter)) {
6691 if ((filp->f_flags & O_NONBLOCK))
6692 return -EAGAIN;
6693
6694 ret = wait_on_pipe(iter, false);
6695 if (ret)
6696 return ret;
6697
6698 goto again;
6699 }
6700 return 0;
6701 }
6702
6703 info->read = 0;
6704 read:
6705 size = PAGE_SIZE - info->read;
6706 if (size > count)
6707 size = count;
6708
6709 ret = copy_to_user(ubuf, info->spare + info->read, size);
6710 if (ret == size)
6711 return -EFAULT;
6712
6713 size -= ret;
6714
6715 *ppos += size;
6716 info->read += size;
6717
6718 return size;
6719 }
6720
tracing_buffers_release(struct inode * inode,struct file * file)6721 static int tracing_buffers_release(struct inode *inode, struct file *file)
6722 {
6723 struct ftrace_buffer_info *info = file->private_data;
6724 struct trace_iterator *iter = &info->iter;
6725
6726 mutex_lock(&trace_types_lock);
6727
6728 iter->tr->current_trace->ref--;
6729
6730 __trace_array_put(iter->tr);
6731
6732 if (info->spare)
6733 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6734 info->spare_cpu, info->spare);
6735 kfree(info);
6736
6737 mutex_unlock(&trace_types_lock);
6738
6739 return 0;
6740 }
6741
6742 struct buffer_ref {
6743 struct ring_buffer *buffer;
6744 void *page;
6745 int cpu;
6746 refcount_t refcount;
6747 };
6748
buffer_ref_release(struct buffer_ref * ref)6749 static void buffer_ref_release(struct buffer_ref *ref)
6750 {
6751 if (!refcount_dec_and_test(&ref->refcount))
6752 return;
6753 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6754 kfree(ref);
6755 }
6756
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)6757 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6758 struct pipe_buffer *buf)
6759 {
6760 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6761
6762 buffer_ref_release(ref);
6763 buf->private = 0;
6764 }
6765
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)6766 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6767 struct pipe_buffer *buf)
6768 {
6769 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6770
6771 if (refcount_read(&ref->refcount) > INT_MAX/2)
6772 return false;
6773
6774 refcount_inc(&ref->refcount);
6775 return true;
6776 }
6777
6778 /* Pipe buffer operations for a buffer. */
6779 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6780 .can_merge = 0,
6781 .confirm = generic_pipe_buf_confirm,
6782 .release = buffer_pipe_buf_release,
6783 .steal = generic_pipe_buf_nosteal,
6784 .get = buffer_pipe_buf_get,
6785 };
6786
6787 /*
6788 * Callback from splice_to_pipe(), if we need to release some pages
6789 * at the end of the spd in case we error'ed out in filling the pipe.
6790 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)6791 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6792 {
6793 struct buffer_ref *ref =
6794 (struct buffer_ref *)spd->partial[i].private;
6795
6796 buffer_ref_release(ref);
6797 spd->partial[i].private = 0;
6798 }
6799
6800 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6801 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6802 struct pipe_inode_info *pipe, size_t len,
6803 unsigned int flags)
6804 {
6805 struct ftrace_buffer_info *info = file->private_data;
6806 struct trace_iterator *iter = &info->iter;
6807 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6808 struct page *pages_def[PIPE_DEF_BUFFERS];
6809 struct splice_pipe_desc spd = {
6810 .pages = pages_def,
6811 .partial = partial_def,
6812 .nr_pages_max = PIPE_DEF_BUFFERS,
6813 .ops = &buffer_pipe_buf_ops,
6814 .spd_release = buffer_spd_release,
6815 };
6816 struct buffer_ref *ref;
6817 int entries, i;
6818 ssize_t ret = 0;
6819
6820 #ifdef CONFIG_TRACER_MAX_TRACE
6821 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6822 return -EBUSY;
6823 #endif
6824
6825 if (*ppos & (PAGE_SIZE - 1))
6826 return -EINVAL;
6827
6828 if (len & (PAGE_SIZE - 1)) {
6829 if (len < PAGE_SIZE)
6830 return -EINVAL;
6831 len &= PAGE_MASK;
6832 }
6833
6834 if (splice_grow_spd(pipe, &spd))
6835 return -ENOMEM;
6836
6837 again:
6838 trace_access_lock(iter->cpu_file);
6839 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6840
6841 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6842 struct page *page;
6843 int r;
6844
6845 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6846 if (!ref) {
6847 ret = -ENOMEM;
6848 break;
6849 }
6850
6851 refcount_set(&ref->refcount, 1);
6852 ref->buffer = iter->trace_buffer->buffer;
6853 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6854 if (IS_ERR(ref->page)) {
6855 ret = PTR_ERR(ref->page);
6856 ref->page = NULL;
6857 kfree(ref);
6858 break;
6859 }
6860 ref->cpu = iter->cpu_file;
6861
6862 r = ring_buffer_read_page(ref->buffer, &ref->page,
6863 len, iter->cpu_file, 1);
6864 if (r < 0) {
6865 ring_buffer_free_read_page(ref->buffer, ref->cpu,
6866 ref->page);
6867 kfree(ref);
6868 break;
6869 }
6870
6871 page = virt_to_page(ref->page);
6872
6873 spd.pages[i] = page;
6874 spd.partial[i].len = PAGE_SIZE;
6875 spd.partial[i].offset = 0;
6876 spd.partial[i].private = (unsigned long)ref;
6877 spd.nr_pages++;
6878 *ppos += PAGE_SIZE;
6879
6880 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6881 }
6882
6883 trace_access_unlock(iter->cpu_file);
6884 spd.nr_pages = i;
6885
6886 /* did we read anything? */
6887 if (!spd.nr_pages) {
6888 if (ret)
6889 goto out;
6890
6891 ret = -EAGAIN;
6892 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6893 goto out;
6894
6895 ret = wait_on_pipe(iter, true);
6896 if (ret)
6897 goto out;
6898
6899 goto again;
6900 }
6901
6902 ret = splice_to_pipe(pipe, &spd);
6903 out:
6904 splice_shrink_spd(&spd);
6905
6906 return ret;
6907 }
6908
6909 static const struct file_operations tracing_buffers_fops = {
6910 .open = tracing_buffers_open,
6911 .read = tracing_buffers_read,
6912 .poll = tracing_buffers_poll,
6913 .release = tracing_buffers_release,
6914 .splice_read = tracing_buffers_splice_read,
6915 .llseek = no_llseek,
6916 };
6917
6918 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)6919 tracing_stats_read(struct file *filp, char __user *ubuf,
6920 size_t count, loff_t *ppos)
6921 {
6922 struct inode *inode = file_inode(filp);
6923 struct trace_array *tr = inode->i_private;
6924 struct trace_buffer *trace_buf = &tr->trace_buffer;
6925 int cpu = tracing_get_cpu(inode);
6926 struct trace_seq *s;
6927 unsigned long cnt;
6928 unsigned long long t;
6929 unsigned long usec_rem;
6930
6931 s = kmalloc(sizeof(*s), GFP_KERNEL);
6932 if (!s)
6933 return -ENOMEM;
6934
6935 trace_seq_init(s);
6936
6937 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6938 trace_seq_printf(s, "entries: %ld\n", cnt);
6939
6940 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6941 trace_seq_printf(s, "overrun: %ld\n", cnt);
6942
6943 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6944 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6945
6946 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6947 trace_seq_printf(s, "bytes: %ld\n", cnt);
6948
6949 if (trace_clocks[tr->clock_id].in_ns) {
6950 /* local or global for trace_clock */
6951 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6952 usec_rem = do_div(t, USEC_PER_SEC);
6953 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6954 t, usec_rem);
6955
6956 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6957 usec_rem = do_div(t, USEC_PER_SEC);
6958 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6959 } else {
6960 /* counter or tsc mode for trace_clock */
6961 trace_seq_printf(s, "oldest event ts: %llu\n",
6962 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6963
6964 trace_seq_printf(s, "now ts: %llu\n",
6965 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6966 }
6967
6968 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6969 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6970
6971 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6972 trace_seq_printf(s, "read events: %ld\n", cnt);
6973
6974 count = simple_read_from_buffer(ubuf, count, ppos,
6975 s->buffer, trace_seq_used(s));
6976
6977 kfree(s);
6978
6979 return count;
6980 }
6981
6982 static const struct file_operations tracing_stats_fops = {
6983 .open = tracing_open_generic_tr,
6984 .read = tracing_stats_read,
6985 .llseek = generic_file_llseek,
6986 .release = tracing_release_generic_tr,
6987 };
6988
6989 #ifdef CONFIG_DYNAMIC_FTRACE
6990
6991 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6992 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6993 size_t cnt, loff_t *ppos)
6994 {
6995 unsigned long *p = filp->private_data;
6996 char buf[64]; /* Not too big for a shallow stack */
6997 int r;
6998
6999 r = scnprintf(buf, 63, "%ld", *p);
7000 buf[r++] = '\n';
7001
7002 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7003 }
7004
7005 static const struct file_operations tracing_dyn_info_fops = {
7006 .open = tracing_open_generic,
7007 .read = tracing_read_dyn_info,
7008 .llseek = generic_file_llseek,
7009 };
7010 #endif /* CONFIG_DYNAMIC_FTRACE */
7011
7012 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7013 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7014 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7015 struct trace_array *tr, struct ftrace_probe_ops *ops,
7016 void *data)
7017 {
7018 tracing_snapshot_instance(tr);
7019 }
7020
7021 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7022 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7023 struct trace_array *tr, struct ftrace_probe_ops *ops,
7024 void *data)
7025 {
7026 struct ftrace_func_mapper *mapper = data;
7027 long *count = NULL;
7028
7029 if (mapper)
7030 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7031
7032 if (count) {
7033
7034 if (*count <= 0)
7035 return;
7036
7037 (*count)--;
7038 }
7039
7040 tracing_snapshot_instance(tr);
7041 }
7042
7043 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7044 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7045 struct ftrace_probe_ops *ops, void *data)
7046 {
7047 struct ftrace_func_mapper *mapper = data;
7048 long *count = NULL;
7049
7050 seq_printf(m, "%ps:", (void *)ip);
7051
7052 seq_puts(m, "snapshot");
7053
7054 if (mapper)
7055 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7056
7057 if (count)
7058 seq_printf(m, ":count=%ld\n", *count);
7059 else
7060 seq_puts(m, ":unlimited\n");
7061
7062 return 0;
7063 }
7064
7065 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)7066 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7067 unsigned long ip, void *init_data, void **data)
7068 {
7069 struct ftrace_func_mapper *mapper = *data;
7070
7071 if (!mapper) {
7072 mapper = allocate_ftrace_func_mapper();
7073 if (!mapper)
7074 return -ENOMEM;
7075 *data = mapper;
7076 }
7077
7078 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7079 }
7080
7081 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)7082 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7083 unsigned long ip, void *data)
7084 {
7085 struct ftrace_func_mapper *mapper = data;
7086
7087 if (!ip) {
7088 if (!mapper)
7089 return;
7090 free_ftrace_func_mapper(mapper, NULL);
7091 return;
7092 }
7093
7094 ftrace_func_mapper_remove_ip(mapper, ip);
7095 }
7096
7097 static struct ftrace_probe_ops snapshot_probe_ops = {
7098 .func = ftrace_snapshot,
7099 .print = ftrace_snapshot_print,
7100 };
7101
7102 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7103 .func = ftrace_count_snapshot,
7104 .print = ftrace_snapshot_print,
7105 .init = ftrace_snapshot_init,
7106 .free = ftrace_snapshot_free,
7107 };
7108
7109 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)7110 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7111 char *glob, char *cmd, char *param, int enable)
7112 {
7113 struct ftrace_probe_ops *ops;
7114 void *count = (void *)-1;
7115 char *number;
7116 int ret;
7117
7118 if (!tr)
7119 return -ENODEV;
7120
7121 /* hash funcs only work with set_ftrace_filter */
7122 if (!enable)
7123 return -EINVAL;
7124
7125 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7126
7127 if (glob[0] == '!')
7128 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7129
7130 if (!param)
7131 goto out_reg;
7132
7133 number = strsep(¶m, ":");
7134
7135 if (!strlen(number))
7136 goto out_reg;
7137
7138 /*
7139 * We use the callback data field (which is a pointer)
7140 * as our counter.
7141 */
7142 ret = kstrtoul(number, 0, (unsigned long *)&count);
7143 if (ret)
7144 return ret;
7145
7146 out_reg:
7147 ret = tracing_alloc_snapshot_instance(tr);
7148 if (ret < 0)
7149 goto out;
7150
7151 ret = register_ftrace_function_probe(glob, tr, ops, count);
7152
7153 out:
7154 return ret < 0 ? ret : 0;
7155 }
7156
7157 static struct ftrace_func_command ftrace_snapshot_cmd = {
7158 .name = "snapshot",
7159 .func = ftrace_trace_snapshot_callback,
7160 };
7161
register_snapshot_cmd(void)7162 static __init int register_snapshot_cmd(void)
7163 {
7164 return register_ftrace_command(&ftrace_snapshot_cmd);
7165 }
7166 #else
register_snapshot_cmd(void)7167 static inline __init int register_snapshot_cmd(void) { return 0; }
7168 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7169
tracing_get_dentry(struct trace_array * tr)7170 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7171 {
7172 if (WARN_ON(!tr->dir))
7173 return ERR_PTR(-ENODEV);
7174
7175 /* Top directory uses NULL as the parent */
7176 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7177 return NULL;
7178
7179 /* All sub buffers have a descriptor */
7180 return tr->dir;
7181 }
7182
tracing_dentry_percpu(struct trace_array * tr,int cpu)7183 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7184 {
7185 struct dentry *d_tracer;
7186
7187 if (tr->percpu_dir)
7188 return tr->percpu_dir;
7189
7190 d_tracer = tracing_get_dentry(tr);
7191 if (IS_ERR(d_tracer))
7192 return NULL;
7193
7194 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7195
7196 WARN_ONCE(!tr->percpu_dir,
7197 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7198
7199 return tr->percpu_dir;
7200 }
7201
7202 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)7203 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7204 void *data, long cpu, const struct file_operations *fops)
7205 {
7206 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7207
7208 if (ret) /* See tracing_get_cpu() */
7209 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7210 return ret;
7211 }
7212
7213 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)7214 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7215 {
7216 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7217 struct dentry *d_cpu;
7218 char cpu_dir[30]; /* 30 characters should be more than enough */
7219
7220 if (!d_percpu)
7221 return;
7222
7223 snprintf(cpu_dir, 30, "cpu%ld", cpu);
7224 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7225 if (!d_cpu) {
7226 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7227 return;
7228 }
7229
7230 /* per cpu trace_pipe */
7231 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7232 tr, cpu, &tracing_pipe_fops);
7233
7234 /* per cpu trace */
7235 trace_create_cpu_file("trace", 0644, d_cpu,
7236 tr, cpu, &tracing_fops);
7237
7238 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7239 tr, cpu, &tracing_buffers_fops);
7240
7241 trace_create_cpu_file("stats", 0444, d_cpu,
7242 tr, cpu, &tracing_stats_fops);
7243
7244 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7245 tr, cpu, &tracing_entries_fops);
7246
7247 #ifdef CONFIG_TRACER_SNAPSHOT
7248 trace_create_cpu_file("snapshot", 0644, d_cpu,
7249 tr, cpu, &snapshot_fops);
7250
7251 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7252 tr, cpu, &snapshot_raw_fops);
7253 #endif
7254 }
7255
7256 #ifdef CONFIG_FTRACE_SELFTEST
7257 /* Let selftest have access to static functions in this file */
7258 #include "trace_selftest.c"
7259 #endif
7260
7261 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7262 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7263 loff_t *ppos)
7264 {
7265 struct trace_option_dentry *topt = filp->private_data;
7266 char *buf;
7267
7268 if (topt->flags->val & topt->opt->bit)
7269 buf = "1\n";
7270 else
7271 buf = "0\n";
7272
7273 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7274 }
7275
7276 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7277 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7278 loff_t *ppos)
7279 {
7280 struct trace_option_dentry *topt = filp->private_data;
7281 unsigned long val;
7282 int ret;
7283
7284 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7285 if (ret)
7286 return ret;
7287
7288 if (val != 0 && val != 1)
7289 return -EINVAL;
7290
7291 if (!!(topt->flags->val & topt->opt->bit) != val) {
7292 mutex_lock(&trace_types_lock);
7293 ret = __set_tracer_option(topt->tr, topt->flags,
7294 topt->opt, !val);
7295 mutex_unlock(&trace_types_lock);
7296 if (ret)
7297 return ret;
7298 }
7299
7300 *ppos += cnt;
7301
7302 return cnt;
7303 }
7304
7305
7306 static const struct file_operations trace_options_fops = {
7307 .open = tracing_open_generic,
7308 .read = trace_options_read,
7309 .write = trace_options_write,
7310 .llseek = generic_file_llseek,
7311 };
7312
7313 /*
7314 * In order to pass in both the trace_array descriptor as well as the index
7315 * to the flag that the trace option file represents, the trace_array
7316 * has a character array of trace_flags_index[], which holds the index
7317 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7318 * The address of this character array is passed to the flag option file
7319 * read/write callbacks.
7320 *
7321 * In order to extract both the index and the trace_array descriptor,
7322 * get_tr_index() uses the following algorithm.
7323 *
7324 * idx = *ptr;
7325 *
7326 * As the pointer itself contains the address of the index (remember
7327 * index[1] == 1).
7328 *
7329 * Then to get the trace_array descriptor, by subtracting that index
7330 * from the ptr, we get to the start of the index itself.
7331 *
7332 * ptr - idx == &index[0]
7333 *
7334 * Then a simple container_of() from that pointer gets us to the
7335 * trace_array descriptor.
7336 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)7337 static void get_tr_index(void *data, struct trace_array **ptr,
7338 unsigned int *pindex)
7339 {
7340 *pindex = *(unsigned char *)data;
7341
7342 *ptr = container_of(data - *pindex, struct trace_array,
7343 trace_flags_index);
7344 }
7345
7346 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7347 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7348 loff_t *ppos)
7349 {
7350 void *tr_index = filp->private_data;
7351 struct trace_array *tr;
7352 unsigned int index;
7353 char *buf;
7354
7355 get_tr_index(tr_index, &tr, &index);
7356
7357 if (tr->trace_flags & (1 << index))
7358 buf = "1\n";
7359 else
7360 buf = "0\n";
7361
7362 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7363 }
7364
7365 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7366 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7367 loff_t *ppos)
7368 {
7369 void *tr_index = filp->private_data;
7370 struct trace_array *tr;
7371 unsigned int index;
7372 unsigned long val;
7373 int ret;
7374
7375 get_tr_index(tr_index, &tr, &index);
7376
7377 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7378 if (ret)
7379 return ret;
7380
7381 if (val != 0 && val != 1)
7382 return -EINVAL;
7383
7384 mutex_lock(&event_mutex);
7385 mutex_lock(&trace_types_lock);
7386 ret = set_tracer_flag(tr, 1 << index, val);
7387 mutex_unlock(&trace_types_lock);
7388 mutex_unlock(&event_mutex);
7389
7390 if (ret < 0)
7391 return ret;
7392
7393 *ppos += cnt;
7394
7395 return cnt;
7396 }
7397
7398 static const struct file_operations trace_options_core_fops = {
7399 .open = tracing_open_generic,
7400 .read = trace_options_core_read,
7401 .write = trace_options_core_write,
7402 .llseek = generic_file_llseek,
7403 };
7404
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)7405 struct dentry *trace_create_file(const char *name,
7406 umode_t mode,
7407 struct dentry *parent,
7408 void *data,
7409 const struct file_operations *fops)
7410 {
7411 struct dentry *ret;
7412
7413 ret = tracefs_create_file(name, mode, parent, data, fops);
7414 if (!ret)
7415 pr_warn("Could not create tracefs '%s' entry\n", name);
7416
7417 return ret;
7418 }
7419
7420
trace_options_init_dentry(struct trace_array * tr)7421 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7422 {
7423 struct dentry *d_tracer;
7424
7425 if (tr->options)
7426 return tr->options;
7427
7428 d_tracer = tracing_get_dentry(tr);
7429 if (IS_ERR(d_tracer))
7430 return NULL;
7431
7432 tr->options = tracefs_create_dir("options", d_tracer);
7433 if (!tr->options) {
7434 pr_warn("Could not create tracefs directory 'options'\n");
7435 return NULL;
7436 }
7437
7438 return tr->options;
7439 }
7440
7441 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)7442 create_trace_option_file(struct trace_array *tr,
7443 struct trace_option_dentry *topt,
7444 struct tracer_flags *flags,
7445 struct tracer_opt *opt)
7446 {
7447 struct dentry *t_options;
7448
7449 t_options = trace_options_init_dentry(tr);
7450 if (!t_options)
7451 return;
7452
7453 topt->flags = flags;
7454 topt->opt = opt;
7455 topt->tr = tr;
7456
7457 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7458 &trace_options_fops);
7459
7460 }
7461
7462 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)7463 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7464 {
7465 struct trace_option_dentry *topts;
7466 struct trace_options *tr_topts;
7467 struct tracer_flags *flags;
7468 struct tracer_opt *opts;
7469 int cnt;
7470 int i;
7471
7472 if (!tracer)
7473 return;
7474
7475 flags = tracer->flags;
7476
7477 if (!flags || !flags->opts)
7478 return;
7479
7480 /*
7481 * If this is an instance, only create flags for tracers
7482 * the instance may have.
7483 */
7484 if (!trace_ok_for_array(tracer, tr))
7485 return;
7486
7487 for (i = 0; i < tr->nr_topts; i++) {
7488 /* Make sure there's no duplicate flags. */
7489 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7490 return;
7491 }
7492
7493 opts = flags->opts;
7494
7495 for (cnt = 0; opts[cnt].name; cnt++)
7496 ;
7497
7498 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7499 if (!topts)
7500 return;
7501
7502 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7503 GFP_KERNEL);
7504 if (!tr_topts) {
7505 kfree(topts);
7506 return;
7507 }
7508
7509 tr->topts = tr_topts;
7510 tr->topts[tr->nr_topts].tracer = tracer;
7511 tr->topts[tr->nr_topts].topts = topts;
7512 tr->nr_topts++;
7513
7514 for (cnt = 0; opts[cnt].name; cnt++) {
7515 create_trace_option_file(tr, &topts[cnt], flags,
7516 &opts[cnt]);
7517 WARN_ONCE(topts[cnt].entry == NULL,
7518 "Failed to create trace option: %s",
7519 opts[cnt].name);
7520 }
7521 }
7522
7523 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)7524 create_trace_option_core_file(struct trace_array *tr,
7525 const char *option, long index)
7526 {
7527 struct dentry *t_options;
7528
7529 t_options = trace_options_init_dentry(tr);
7530 if (!t_options)
7531 return NULL;
7532
7533 return trace_create_file(option, 0644, t_options,
7534 (void *)&tr->trace_flags_index[index],
7535 &trace_options_core_fops);
7536 }
7537
create_trace_options_dir(struct trace_array * tr)7538 static void create_trace_options_dir(struct trace_array *tr)
7539 {
7540 struct dentry *t_options;
7541 bool top_level = tr == &global_trace;
7542 int i;
7543
7544 t_options = trace_options_init_dentry(tr);
7545 if (!t_options)
7546 return;
7547
7548 for (i = 0; trace_options[i]; i++) {
7549 if (top_level ||
7550 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7551 create_trace_option_core_file(tr, trace_options[i], i);
7552 }
7553 }
7554
7555 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7556 rb_simple_read(struct file *filp, char __user *ubuf,
7557 size_t cnt, loff_t *ppos)
7558 {
7559 struct trace_array *tr = filp->private_data;
7560 char buf[64];
7561 int r;
7562
7563 r = tracer_tracing_is_on(tr);
7564 r = sprintf(buf, "%d\n", r);
7565
7566 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7567 }
7568
7569 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7570 rb_simple_write(struct file *filp, const char __user *ubuf,
7571 size_t cnt, loff_t *ppos)
7572 {
7573 struct trace_array *tr = filp->private_data;
7574 struct ring_buffer *buffer = tr->trace_buffer.buffer;
7575 unsigned long val;
7576 int ret;
7577
7578 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7579 if (ret)
7580 return ret;
7581
7582 if (buffer) {
7583 mutex_lock(&trace_types_lock);
7584 if (!!val == tracer_tracing_is_on(tr)) {
7585 val = 0; /* do nothing */
7586 } else if (val) {
7587 tracer_tracing_on(tr);
7588 if (tr->current_trace->start)
7589 tr->current_trace->start(tr);
7590 } else {
7591 tracer_tracing_off(tr);
7592 if (tr->current_trace->stop)
7593 tr->current_trace->stop(tr);
7594 }
7595 mutex_unlock(&trace_types_lock);
7596 }
7597
7598 (*ppos)++;
7599
7600 return cnt;
7601 }
7602
7603 static const struct file_operations rb_simple_fops = {
7604 .open = tracing_open_generic_tr,
7605 .read = rb_simple_read,
7606 .write = rb_simple_write,
7607 .release = tracing_release_generic_tr,
7608 .llseek = default_llseek,
7609 };
7610
7611 struct dentry *trace_instance_dir;
7612
7613 static void
7614 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7615
7616 static int
allocate_trace_buffer(struct trace_array * tr,struct trace_buffer * buf,int size)7617 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7618 {
7619 enum ring_buffer_flags rb_flags;
7620
7621 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7622
7623 buf->tr = tr;
7624
7625 buf->buffer = ring_buffer_alloc(size, rb_flags);
7626 if (!buf->buffer)
7627 return -ENOMEM;
7628
7629 buf->data = alloc_percpu(struct trace_array_cpu);
7630 if (!buf->data) {
7631 ring_buffer_free(buf->buffer);
7632 buf->buffer = NULL;
7633 return -ENOMEM;
7634 }
7635
7636 /* Allocate the first page for all buffers */
7637 set_buffer_entries(&tr->trace_buffer,
7638 ring_buffer_size(tr->trace_buffer.buffer, 0));
7639
7640 return 0;
7641 }
7642
allocate_trace_buffers(struct trace_array * tr,int size)7643 static int allocate_trace_buffers(struct trace_array *tr, int size)
7644 {
7645 int ret;
7646
7647 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7648 if (ret)
7649 return ret;
7650
7651 #ifdef CONFIG_TRACER_MAX_TRACE
7652 ret = allocate_trace_buffer(tr, &tr->max_buffer,
7653 allocate_snapshot ? size : 1);
7654 if (WARN_ON(ret)) {
7655 ring_buffer_free(tr->trace_buffer.buffer);
7656 tr->trace_buffer.buffer = NULL;
7657 free_percpu(tr->trace_buffer.data);
7658 tr->trace_buffer.data = NULL;
7659 return -ENOMEM;
7660 }
7661 tr->allocated_snapshot = allocate_snapshot;
7662
7663 /*
7664 * Only the top level trace array gets its snapshot allocated
7665 * from the kernel command line.
7666 */
7667 allocate_snapshot = false;
7668 #endif
7669 return 0;
7670 }
7671
free_trace_buffer(struct trace_buffer * buf)7672 static void free_trace_buffer(struct trace_buffer *buf)
7673 {
7674 if (buf->buffer) {
7675 ring_buffer_free(buf->buffer);
7676 buf->buffer = NULL;
7677 free_percpu(buf->data);
7678 buf->data = NULL;
7679 }
7680 }
7681
free_trace_buffers(struct trace_array * tr)7682 static void free_trace_buffers(struct trace_array *tr)
7683 {
7684 if (!tr)
7685 return;
7686
7687 free_trace_buffer(&tr->trace_buffer);
7688
7689 #ifdef CONFIG_TRACER_MAX_TRACE
7690 free_trace_buffer(&tr->max_buffer);
7691 #endif
7692 }
7693
init_trace_flags_index(struct trace_array * tr)7694 static void init_trace_flags_index(struct trace_array *tr)
7695 {
7696 int i;
7697
7698 /* Used by the trace options files */
7699 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7700 tr->trace_flags_index[i] = i;
7701 }
7702
__update_tracer_options(struct trace_array * tr)7703 static void __update_tracer_options(struct trace_array *tr)
7704 {
7705 struct tracer *t;
7706
7707 for (t = trace_types; t; t = t->next)
7708 add_tracer_options(tr, t);
7709 }
7710
update_tracer_options(struct trace_array * tr)7711 static void update_tracer_options(struct trace_array *tr)
7712 {
7713 mutex_lock(&trace_types_lock);
7714 __update_tracer_options(tr);
7715 mutex_unlock(&trace_types_lock);
7716 }
7717
instance_mkdir(const char * name)7718 static int instance_mkdir(const char *name)
7719 {
7720 struct trace_array *tr;
7721 int ret;
7722
7723 mutex_lock(&trace_types_lock);
7724
7725 ret = -EEXIST;
7726 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7727 if (tr->name && strcmp(tr->name, name) == 0)
7728 goto out_unlock;
7729 }
7730
7731 ret = -ENOMEM;
7732 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7733 if (!tr)
7734 goto out_unlock;
7735
7736 tr->name = kstrdup(name, GFP_KERNEL);
7737 if (!tr->name)
7738 goto out_free_tr;
7739
7740 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7741 goto out_free_tr;
7742
7743 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7744
7745 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7746
7747 raw_spin_lock_init(&tr->start_lock);
7748
7749 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7750
7751 tr->current_trace = &nop_trace;
7752
7753 INIT_LIST_HEAD(&tr->systems);
7754 INIT_LIST_HEAD(&tr->events);
7755
7756 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7757 goto out_free_tr;
7758
7759 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7760 if (!tr->dir)
7761 goto out_free_tr;
7762
7763 ret = event_trace_add_tracer(tr->dir, tr);
7764 if (ret) {
7765 tracefs_remove_recursive(tr->dir);
7766 goto out_free_tr;
7767 }
7768
7769 ftrace_init_trace_array(tr);
7770
7771 init_tracer_tracefs(tr, tr->dir);
7772 init_trace_flags_index(tr);
7773 __update_tracer_options(tr);
7774
7775 list_add(&tr->list, &ftrace_trace_arrays);
7776
7777 mutex_unlock(&trace_types_lock);
7778
7779 return 0;
7780
7781 out_free_tr:
7782 free_trace_buffers(tr);
7783 free_cpumask_var(tr->tracing_cpumask);
7784 kfree(tr->name);
7785 kfree(tr);
7786
7787 out_unlock:
7788 mutex_unlock(&trace_types_lock);
7789
7790 return ret;
7791
7792 }
7793
instance_rmdir(const char * name)7794 static int instance_rmdir(const char *name)
7795 {
7796 struct trace_array *tr;
7797 int found = 0;
7798 int ret;
7799 int i;
7800
7801 mutex_lock(&trace_types_lock);
7802
7803 ret = -ENODEV;
7804 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7805 if (tr->name && strcmp(tr->name, name) == 0) {
7806 found = 1;
7807 break;
7808 }
7809 }
7810 if (!found)
7811 goto out_unlock;
7812
7813 ret = -EBUSY;
7814 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7815 goto out_unlock;
7816
7817 list_del(&tr->list);
7818
7819 /* Disable all the flags that were enabled coming in */
7820 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7821 if ((1 << i) & ZEROED_TRACE_FLAGS)
7822 set_tracer_flag(tr, 1 << i, 0);
7823 }
7824
7825 tracing_set_nop(tr);
7826 clear_ftrace_function_probes(tr);
7827 event_trace_del_tracer(tr);
7828 ftrace_clear_pids(tr);
7829 ftrace_destroy_function_files(tr);
7830 tracefs_remove_recursive(tr->dir);
7831 free_trace_buffers(tr);
7832
7833 for (i = 0; i < tr->nr_topts; i++) {
7834 kfree(tr->topts[i].topts);
7835 }
7836 kfree(tr->topts);
7837
7838 free_cpumask_var(tr->tracing_cpumask);
7839 kfree(tr->name);
7840 kfree(tr);
7841
7842 ret = 0;
7843
7844 out_unlock:
7845 mutex_unlock(&trace_types_lock);
7846
7847 return ret;
7848 }
7849
create_trace_instances(struct dentry * d_tracer)7850 static __init void create_trace_instances(struct dentry *d_tracer)
7851 {
7852 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7853 instance_mkdir,
7854 instance_rmdir);
7855 if (WARN_ON(!trace_instance_dir))
7856 return;
7857 }
7858
7859 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)7860 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7861 {
7862 int cpu;
7863
7864 trace_create_file("available_tracers", 0444, d_tracer,
7865 tr, &show_traces_fops);
7866
7867 trace_create_file("current_tracer", 0644, d_tracer,
7868 tr, &set_tracer_fops);
7869
7870 trace_create_file("tracing_cpumask", 0644, d_tracer,
7871 tr, &tracing_cpumask_fops);
7872
7873 trace_create_file("trace_options", 0644, d_tracer,
7874 tr, &tracing_iter_fops);
7875
7876 trace_create_file("trace", 0644, d_tracer,
7877 tr, &tracing_fops);
7878
7879 trace_create_file("trace_pipe", 0444, d_tracer,
7880 tr, &tracing_pipe_fops);
7881
7882 trace_create_file("buffer_size_kb", 0644, d_tracer,
7883 tr, &tracing_entries_fops);
7884
7885 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7886 tr, &tracing_total_entries_fops);
7887
7888 trace_create_file("free_buffer", 0200, d_tracer,
7889 tr, &tracing_free_buffer_fops);
7890
7891 trace_create_file("trace_marker", 0220, d_tracer,
7892 tr, &tracing_mark_fops);
7893
7894 trace_create_file("trace_marker_raw", 0220, d_tracer,
7895 tr, &tracing_mark_raw_fops);
7896
7897 trace_create_file("trace_clock", 0644, d_tracer, tr,
7898 &trace_clock_fops);
7899
7900 trace_create_file("tracing_on", 0644, d_tracer,
7901 tr, &rb_simple_fops);
7902
7903 create_trace_options_dir(tr);
7904
7905 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7906 trace_create_file("tracing_max_latency", 0644, d_tracer,
7907 &tr->max_latency, &tracing_max_lat_fops);
7908 #endif
7909
7910 if (ftrace_create_function_files(tr, d_tracer))
7911 WARN(1, "Could not allocate function filter files");
7912
7913 #ifdef CONFIG_TRACER_SNAPSHOT
7914 trace_create_file("snapshot", 0644, d_tracer,
7915 tr, &snapshot_fops);
7916 #endif
7917
7918 for_each_tracing_cpu(cpu)
7919 tracing_init_tracefs_percpu(tr, cpu);
7920
7921 ftrace_init_tracefs(tr, d_tracer);
7922 }
7923
trace_automount(struct dentry * mntpt,void * ingore)7924 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7925 {
7926 struct vfsmount *mnt;
7927 struct file_system_type *type;
7928
7929 /*
7930 * To maintain backward compatibility for tools that mount
7931 * debugfs to get to the tracing facility, tracefs is automatically
7932 * mounted to the debugfs/tracing directory.
7933 */
7934 type = get_fs_type("tracefs");
7935 if (!type)
7936 return NULL;
7937 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7938 put_filesystem(type);
7939 if (IS_ERR(mnt))
7940 return NULL;
7941 mntget(mnt);
7942
7943 return mnt;
7944 }
7945
7946 /**
7947 * tracing_init_dentry - initialize top level trace array
7948 *
7949 * This is called when creating files or directories in the tracing
7950 * directory. It is called via fs_initcall() by any of the boot up code
7951 * and expects to return the dentry of the top level tracing directory.
7952 */
tracing_init_dentry(void)7953 struct dentry *tracing_init_dentry(void)
7954 {
7955 struct trace_array *tr = &global_trace;
7956
7957 /* The top level trace array uses NULL as parent */
7958 if (tr->dir)
7959 return NULL;
7960
7961 if (WARN_ON(!tracefs_initialized()) ||
7962 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7963 WARN_ON(!debugfs_initialized())))
7964 return ERR_PTR(-ENODEV);
7965
7966 /*
7967 * As there may still be users that expect the tracing
7968 * files to exist in debugfs/tracing, we must automount
7969 * the tracefs file system there, so older tools still
7970 * work with the newer kerenl.
7971 */
7972 tr->dir = debugfs_create_automount("tracing", NULL,
7973 trace_automount, NULL);
7974 if (!tr->dir) {
7975 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7976 return ERR_PTR(-ENOMEM);
7977 }
7978
7979 return NULL;
7980 }
7981
7982 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7983 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7984
trace_eval_init(void)7985 static void __init trace_eval_init(void)
7986 {
7987 int len;
7988
7989 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7990 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7991 }
7992
7993 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)7994 static void trace_module_add_evals(struct module *mod)
7995 {
7996 if (!mod->num_trace_evals)
7997 return;
7998
7999 /*
8000 * Modules with bad taint do not have events created, do
8001 * not bother with enums either.
8002 */
8003 if (trace_module_has_bad_taint(mod))
8004 return;
8005
8006 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8007 }
8008
8009 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)8010 static void trace_module_remove_evals(struct module *mod)
8011 {
8012 union trace_eval_map_item *map;
8013 union trace_eval_map_item **last = &trace_eval_maps;
8014
8015 if (!mod->num_trace_evals)
8016 return;
8017
8018 mutex_lock(&trace_eval_mutex);
8019
8020 map = trace_eval_maps;
8021
8022 while (map) {
8023 if (map->head.mod == mod)
8024 break;
8025 map = trace_eval_jmp_to_tail(map);
8026 last = &map->tail.next;
8027 map = map->tail.next;
8028 }
8029 if (!map)
8030 goto out;
8031
8032 *last = trace_eval_jmp_to_tail(map)->tail.next;
8033 kfree(map);
8034 out:
8035 mutex_unlock(&trace_eval_mutex);
8036 }
8037 #else
trace_module_remove_evals(struct module * mod)8038 static inline void trace_module_remove_evals(struct module *mod) { }
8039 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8040
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)8041 static int trace_module_notify(struct notifier_block *self,
8042 unsigned long val, void *data)
8043 {
8044 struct module *mod = data;
8045
8046 switch (val) {
8047 case MODULE_STATE_COMING:
8048 trace_module_add_evals(mod);
8049 break;
8050 case MODULE_STATE_GOING:
8051 trace_module_remove_evals(mod);
8052 break;
8053 }
8054
8055 return 0;
8056 }
8057
8058 static struct notifier_block trace_module_nb = {
8059 .notifier_call = trace_module_notify,
8060 .priority = 0,
8061 };
8062 #endif /* CONFIG_MODULES */
8063
tracer_init_tracefs(void)8064 static __init int tracer_init_tracefs(void)
8065 {
8066 struct dentry *d_tracer;
8067
8068 trace_access_lock_init();
8069
8070 d_tracer = tracing_init_dentry();
8071 if (IS_ERR(d_tracer))
8072 return 0;
8073
8074 init_tracer_tracefs(&global_trace, d_tracer);
8075 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8076
8077 trace_create_file("tracing_thresh", 0644, d_tracer,
8078 &global_trace, &tracing_thresh_fops);
8079
8080 trace_create_file("README", 0444, d_tracer,
8081 NULL, &tracing_readme_fops);
8082
8083 trace_create_file("saved_cmdlines", 0444, d_tracer,
8084 NULL, &tracing_saved_cmdlines_fops);
8085
8086 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8087 NULL, &tracing_saved_cmdlines_size_fops);
8088
8089 trace_create_file("saved_tgids", 0444, d_tracer,
8090 NULL, &tracing_saved_tgids_fops);
8091
8092 trace_eval_init();
8093
8094 trace_create_eval_file(d_tracer);
8095
8096 #ifdef CONFIG_MODULES
8097 register_module_notifier(&trace_module_nb);
8098 #endif
8099
8100 #ifdef CONFIG_DYNAMIC_FTRACE
8101 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8102 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8103 #endif
8104
8105 create_trace_instances(d_tracer);
8106
8107 update_tracer_options(&global_trace);
8108
8109 return 0;
8110 }
8111
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)8112 static int trace_panic_handler(struct notifier_block *this,
8113 unsigned long event, void *unused)
8114 {
8115 if (ftrace_dump_on_oops)
8116 ftrace_dump(ftrace_dump_on_oops);
8117 return NOTIFY_OK;
8118 }
8119
8120 static struct notifier_block trace_panic_notifier = {
8121 .notifier_call = trace_panic_handler,
8122 .next = NULL,
8123 .priority = 150 /* priority: INT_MAX >= x >= 0 */
8124 };
8125
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)8126 static int trace_die_handler(struct notifier_block *self,
8127 unsigned long val,
8128 void *data)
8129 {
8130 switch (val) {
8131 case DIE_OOPS:
8132 if (ftrace_dump_on_oops)
8133 ftrace_dump(ftrace_dump_on_oops);
8134 break;
8135 default:
8136 break;
8137 }
8138 return NOTIFY_OK;
8139 }
8140
8141 static struct notifier_block trace_die_notifier = {
8142 .notifier_call = trace_die_handler,
8143 .priority = 200
8144 };
8145
8146 /*
8147 * printk is set to max of 1024, we really don't need it that big.
8148 * Nothing should be printing 1000 characters anyway.
8149 */
8150 #define TRACE_MAX_PRINT 1000
8151
8152 /*
8153 * Define here KERN_TRACE so that we have one place to modify
8154 * it if we decide to change what log level the ftrace dump
8155 * should be at.
8156 */
8157 #define KERN_TRACE KERN_EMERG
8158
8159 void
trace_printk_seq(struct trace_seq * s)8160 trace_printk_seq(struct trace_seq *s)
8161 {
8162 /* Probably should print a warning here. */
8163 if (s->seq.len >= TRACE_MAX_PRINT)
8164 s->seq.len = TRACE_MAX_PRINT;
8165
8166 /*
8167 * More paranoid code. Although the buffer size is set to
8168 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8169 * an extra layer of protection.
8170 */
8171 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8172 s->seq.len = s->seq.size - 1;
8173
8174 /* should be zero ended, but we are paranoid. */
8175 s->buffer[s->seq.len] = 0;
8176
8177 printk(KERN_TRACE "%s", s->buffer);
8178
8179 trace_seq_init(s);
8180 }
8181
trace_init_global_iter(struct trace_iterator * iter)8182 void trace_init_global_iter(struct trace_iterator *iter)
8183 {
8184 iter->tr = &global_trace;
8185 iter->trace = iter->tr->current_trace;
8186 iter->cpu_file = RING_BUFFER_ALL_CPUS;
8187 iter->trace_buffer = &global_trace.trace_buffer;
8188
8189 if (iter->trace && iter->trace->open)
8190 iter->trace->open(iter);
8191
8192 /* Annotate start of buffers if we had overruns */
8193 if (ring_buffer_overruns(iter->trace_buffer->buffer))
8194 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8195
8196 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8197 if (trace_clocks[iter->tr->clock_id].in_ns)
8198 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8199 }
8200
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)8201 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8202 {
8203 /* use static because iter can be a bit big for the stack */
8204 static struct trace_iterator iter;
8205 static atomic_t dump_running;
8206 struct trace_array *tr = &global_trace;
8207 unsigned int old_userobj;
8208 unsigned long flags;
8209 int cnt = 0, cpu;
8210
8211 /* Only allow one dump user at a time. */
8212 if (atomic_inc_return(&dump_running) != 1) {
8213 atomic_dec(&dump_running);
8214 return;
8215 }
8216
8217 /*
8218 * Always turn off tracing when we dump.
8219 * We don't need to show trace output of what happens
8220 * between multiple crashes.
8221 *
8222 * If the user does a sysrq-z, then they can re-enable
8223 * tracing with echo 1 > tracing_on.
8224 */
8225 tracing_off();
8226
8227 local_irq_save(flags);
8228 printk_nmi_direct_enter();
8229
8230 /* Simulate the iterator */
8231 trace_init_global_iter(&iter);
8232
8233 for_each_tracing_cpu(cpu) {
8234 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8235 }
8236
8237 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8238
8239 /* don't look at user memory in panic mode */
8240 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8241
8242 switch (oops_dump_mode) {
8243 case DUMP_ALL:
8244 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8245 break;
8246 case DUMP_ORIG:
8247 iter.cpu_file = raw_smp_processor_id();
8248 break;
8249 case DUMP_NONE:
8250 goto out_enable;
8251 default:
8252 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8253 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8254 }
8255
8256 printk(KERN_TRACE "Dumping ftrace buffer:\n");
8257
8258 /* Did function tracer already get disabled? */
8259 if (ftrace_is_dead()) {
8260 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8261 printk("# MAY BE MISSING FUNCTION EVENTS\n");
8262 }
8263
8264 /*
8265 * We need to stop all tracing on all CPUS to read the
8266 * the next buffer. This is a bit expensive, but is
8267 * not done often. We fill all what we can read,
8268 * and then release the locks again.
8269 */
8270
8271 while (!trace_empty(&iter)) {
8272
8273 if (!cnt)
8274 printk(KERN_TRACE "---------------------------------\n");
8275
8276 cnt++;
8277
8278 trace_iterator_reset(&iter);
8279 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8280
8281 if (trace_find_next_entry_inc(&iter) != NULL) {
8282 int ret;
8283
8284 ret = print_trace_line(&iter);
8285 if (ret != TRACE_TYPE_NO_CONSUME)
8286 trace_consume(&iter);
8287 }
8288 touch_nmi_watchdog();
8289
8290 trace_printk_seq(&iter.seq);
8291 }
8292
8293 if (!cnt)
8294 printk(KERN_TRACE " (ftrace buffer empty)\n");
8295 else
8296 printk(KERN_TRACE "---------------------------------\n");
8297
8298 out_enable:
8299 tr->trace_flags |= old_userobj;
8300
8301 for_each_tracing_cpu(cpu) {
8302 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8303 }
8304 atomic_dec(&dump_running);
8305 printk_nmi_direct_exit();
8306 local_irq_restore(flags);
8307 }
8308 EXPORT_SYMBOL_GPL(ftrace_dump);
8309
tracer_alloc_buffers(void)8310 __init static int tracer_alloc_buffers(void)
8311 {
8312 int ring_buf_size;
8313 int ret = -ENOMEM;
8314
8315 /*
8316 * Make sure we don't accidently add more trace options
8317 * than we have bits for.
8318 */
8319 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8320
8321 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8322 goto out;
8323
8324 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8325 goto out_free_buffer_mask;
8326
8327 /* Only allocate trace_printk buffers if a trace_printk exists */
8328 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8329 /* Must be called before global_trace.buffer is allocated */
8330 trace_printk_init_buffers();
8331
8332 /* To save memory, keep the ring buffer size to its minimum */
8333 if (ring_buffer_expanded)
8334 ring_buf_size = trace_buf_size;
8335 else
8336 ring_buf_size = 1;
8337
8338 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8339 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8340
8341 raw_spin_lock_init(&global_trace.start_lock);
8342
8343 /*
8344 * The prepare callbacks allocates some memory for the ring buffer. We
8345 * don't free the buffer if the if the CPU goes down. If we were to free
8346 * the buffer, then the user would lose any trace that was in the
8347 * buffer. The memory will be removed once the "instance" is removed.
8348 */
8349 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8350 "trace/RB:preapre", trace_rb_cpu_prepare,
8351 NULL);
8352 if (ret < 0)
8353 goto out_free_cpumask;
8354 /* Used for event triggers */
8355 ret = -ENOMEM;
8356 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8357 if (!temp_buffer)
8358 goto out_rm_hp_state;
8359
8360 if (trace_create_savedcmd() < 0)
8361 goto out_free_temp_buffer;
8362
8363 /* TODO: make the number of buffers hot pluggable with CPUS */
8364 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8365 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8366 WARN_ON(1);
8367 goto out_free_savedcmd;
8368 }
8369
8370 if (global_trace.buffer_disabled)
8371 tracing_off();
8372
8373 if (trace_boot_clock) {
8374 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8375 if (ret < 0)
8376 pr_warn("Trace clock %s not defined, going back to default\n",
8377 trace_boot_clock);
8378 }
8379
8380 /*
8381 * register_tracer() might reference current_trace, so it
8382 * needs to be set before we register anything. This is
8383 * just a bootstrap of current_trace anyway.
8384 */
8385 global_trace.current_trace = &nop_trace;
8386
8387 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8388
8389 ftrace_init_global_array_ops(&global_trace);
8390
8391 init_trace_flags_index(&global_trace);
8392
8393 register_tracer(&nop_trace);
8394
8395 /* Function tracing may start here (via kernel command line) */
8396 init_function_trace();
8397
8398 /* All seems OK, enable tracing */
8399 tracing_disabled = 0;
8400
8401 atomic_notifier_chain_register(&panic_notifier_list,
8402 &trace_panic_notifier);
8403
8404 register_die_notifier(&trace_die_notifier);
8405
8406 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8407
8408 INIT_LIST_HEAD(&global_trace.systems);
8409 INIT_LIST_HEAD(&global_trace.events);
8410 list_add(&global_trace.list, &ftrace_trace_arrays);
8411
8412 apply_trace_boot_options();
8413
8414 register_snapshot_cmd();
8415
8416 return 0;
8417
8418 out_free_savedcmd:
8419 free_saved_cmdlines_buffer(savedcmd);
8420 out_free_temp_buffer:
8421 ring_buffer_free(temp_buffer);
8422 out_rm_hp_state:
8423 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8424 out_free_cpumask:
8425 free_cpumask_var(global_trace.tracing_cpumask);
8426 out_free_buffer_mask:
8427 free_cpumask_var(tracing_buffer_mask);
8428 out:
8429 return ret;
8430 }
8431
early_trace_init(void)8432 void __init early_trace_init(void)
8433 {
8434 if (tracepoint_printk) {
8435 tracepoint_print_iter =
8436 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8437 if (WARN_ON(!tracepoint_print_iter))
8438 tracepoint_printk = 0;
8439 else
8440 static_key_enable(&tracepoint_printk_key.key);
8441 }
8442 tracer_alloc_buffers();
8443 }
8444
trace_init(void)8445 void __init trace_init(void)
8446 {
8447 trace_event_init();
8448 }
8449
clear_boot_tracer(void)8450 __init static int clear_boot_tracer(void)
8451 {
8452 /*
8453 * The default tracer at boot buffer is an init section.
8454 * This function is called in lateinit. If we did not
8455 * find the boot tracer, then clear it out, to prevent
8456 * later registration from accessing the buffer that is
8457 * about to be freed.
8458 */
8459 if (!default_bootup_tracer)
8460 return 0;
8461
8462 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8463 default_bootup_tracer);
8464 default_bootup_tracer = NULL;
8465
8466 return 0;
8467 }
8468
8469 fs_initcall(tracer_init_tracefs);
8470 late_initcall_sync(clear_boot_tracer);
8471