• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 
50 #include "trace.h"
51 #include "trace_output.h"
52 
53 /*
54  * On boot up, the ring buffer is set to the minimum size, so that
55  * we do not waste memory on systems that are not using tracing.
56  */
57 bool ring_buffer_expanded;
58 
59 /*
60  * We need to change this state when a selftest is running.
61  * A selftest will lurk into the ring-buffer to count the
62  * entries inserted during the selftest although some concurrent
63  * insertions into the ring-buffer such as trace_printk could occurred
64  * at the same time, giving false positive or negative results.
65  */
66 static bool __read_mostly tracing_selftest_running;
67 
68 /*
69  * If a tracer is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72 
73 /* Pipe tracepoints to printk */
74 struct trace_iterator *tracepoint_print_iter;
75 int tracepoint_printk;
76 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
77 
78 /* For tracers that don't implement custom flags */
79 static struct tracer_opt dummy_tracer_opt[] = {
80 	{ }
81 };
82 
83 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)84 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
85 {
86 	return 0;
87 }
88 
89 /*
90  * To prevent the comm cache from being overwritten when no
91  * tracing is active, only save the comm when a trace event
92  * occurred.
93  */
94 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
95 
96 /*
97  * Kill all tracing for good (never come back).
98  * It is initialized to 1 but will turn to zero if the initialization
99  * of the tracer is successful. But that is the only place that sets
100  * this back to zero.
101  */
102 static int tracing_disabled = 1;
103 
104 cpumask_var_t __read_mostly	tracing_buffer_mask;
105 
106 /*
107  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
108  *
109  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
110  * is set, then ftrace_dump is called. This will output the contents
111  * of the ftrace buffers to the console.  This is very useful for
112  * capturing traces that lead to crashes and outputing it to a
113  * serial console.
114  *
115  * It is default off, but you can enable it with either specifying
116  * "ftrace_dump_on_oops" in the kernel command line, or setting
117  * /proc/sys/kernel/ftrace_dump_on_oops
118  * Set 1 if you want to dump buffers of all CPUs
119  * Set 2 if you want to dump the buffer of the CPU that triggered oops
120  */
121 
122 enum ftrace_dump_mode ftrace_dump_on_oops;
123 
124 /* When set, tracing will stop when a WARN*() is hit */
125 int __disable_trace_on_warning;
126 
127 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
128 /* Map of enums to their values, for "eval_map" file */
129 struct trace_eval_map_head {
130 	struct module			*mod;
131 	unsigned long			length;
132 };
133 
134 union trace_eval_map_item;
135 
136 struct trace_eval_map_tail {
137 	/*
138 	 * "end" is first and points to NULL as it must be different
139 	 * than "mod" or "eval_string"
140 	 */
141 	union trace_eval_map_item	*next;
142 	const char			*end;	/* points to NULL */
143 };
144 
145 static DEFINE_MUTEX(trace_eval_mutex);
146 
147 /*
148  * The trace_eval_maps are saved in an array with two extra elements,
149  * one at the beginning, and one at the end. The beginning item contains
150  * the count of the saved maps (head.length), and the module they
151  * belong to if not built in (head.mod). The ending item contains a
152  * pointer to the next array of saved eval_map items.
153  */
154 union trace_eval_map_item {
155 	struct trace_eval_map		map;
156 	struct trace_eval_map_head	head;
157 	struct trace_eval_map_tail	tail;
158 };
159 
160 static union trace_eval_map_item *trace_eval_maps;
161 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
162 
163 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
164 static void ftrace_trace_userstack(struct trace_array *tr,
165 				   struct ring_buffer *buffer,
166 				   unsigned long flags, int pc);
167 
168 #define MAX_TRACER_SIZE		100
169 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
170 static char *default_bootup_tracer;
171 
172 static bool allocate_snapshot;
173 
set_cmdline_ftrace(char * str)174 static int __init set_cmdline_ftrace(char *str)
175 {
176 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
177 	default_bootup_tracer = bootup_tracer_buf;
178 	/* We are using ftrace early, expand it */
179 	ring_buffer_expanded = true;
180 	return 1;
181 }
182 __setup("ftrace=", set_cmdline_ftrace);
183 
set_ftrace_dump_on_oops(char * str)184 static int __init set_ftrace_dump_on_oops(char *str)
185 {
186 	if (*str++ != '=' || !*str) {
187 		ftrace_dump_on_oops = DUMP_ALL;
188 		return 1;
189 	}
190 
191 	if (!strcmp("orig_cpu", str)) {
192 		ftrace_dump_on_oops = DUMP_ORIG;
193                 return 1;
194         }
195 
196         return 0;
197 }
198 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
199 
stop_trace_on_warning(char * str)200 static int __init stop_trace_on_warning(char *str)
201 {
202 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
203 		__disable_trace_on_warning = 1;
204 	return 1;
205 }
206 __setup("traceoff_on_warning", stop_trace_on_warning);
207 
boot_alloc_snapshot(char * str)208 static int __init boot_alloc_snapshot(char *str)
209 {
210 	allocate_snapshot = true;
211 	/* We also need the main ring buffer expanded */
212 	ring_buffer_expanded = true;
213 	return 1;
214 }
215 __setup("alloc_snapshot", boot_alloc_snapshot);
216 
217 
218 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
219 
set_trace_boot_options(char * str)220 static int __init set_trace_boot_options(char *str)
221 {
222 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
223 	return 1;
224 }
225 __setup("trace_options=", set_trace_boot_options);
226 
227 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
228 static char *trace_boot_clock __initdata;
229 
set_trace_boot_clock(char * str)230 static int __init set_trace_boot_clock(char *str)
231 {
232 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
233 	trace_boot_clock = trace_boot_clock_buf;
234 	return 1;
235 }
236 __setup("trace_clock=", set_trace_boot_clock);
237 
set_tracepoint_printk(char * str)238 static int __init set_tracepoint_printk(char *str)
239 {
240 	/* Ignore the "tp_printk_stop_on_boot" param */
241 	if (*str == '_')
242 		return 0;
243 
244 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
245 		tracepoint_printk = 1;
246 	return 1;
247 }
248 __setup("tp_printk", set_tracepoint_printk);
249 
ns2usecs(u64 nsec)250 unsigned long long ns2usecs(u64 nsec)
251 {
252 	nsec += 500;
253 	do_div(nsec, 1000);
254 	return nsec;
255 }
256 
257 /* trace_flags holds trace_options default values */
258 #define TRACE_DEFAULT_FLAGS						\
259 	(FUNCTION_DEFAULT_FLAGS |					\
260 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
261 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
262 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
263 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
264 
265 /* trace_options that are only supported by global_trace */
266 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
267 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
268 
269 /* trace_flags that are default zero for instances */
270 #define ZEROED_TRACE_FLAGS \
271 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
272 
273 /*
274  * The global_trace is the descriptor that holds the top-level tracing
275  * buffers for the live tracing.
276  */
277 static struct trace_array global_trace = {
278 	.trace_flags = TRACE_DEFAULT_FLAGS,
279 };
280 
281 LIST_HEAD(ftrace_trace_arrays);
282 
trace_array_get(struct trace_array * this_tr)283 int trace_array_get(struct trace_array *this_tr)
284 {
285 	struct trace_array *tr;
286 	int ret = -ENODEV;
287 
288 	mutex_lock(&trace_types_lock);
289 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
290 		if (tr == this_tr) {
291 			tr->ref++;
292 			ret = 0;
293 			break;
294 		}
295 	}
296 	mutex_unlock(&trace_types_lock);
297 
298 	return ret;
299 }
300 
__trace_array_put(struct trace_array * this_tr)301 static void __trace_array_put(struct trace_array *this_tr)
302 {
303 	WARN_ON(!this_tr->ref);
304 	this_tr->ref--;
305 }
306 
trace_array_put(struct trace_array * this_tr)307 void trace_array_put(struct trace_array *this_tr)
308 {
309 	mutex_lock(&trace_types_lock);
310 	__trace_array_put(this_tr);
311 	mutex_unlock(&trace_types_lock);
312 }
313 
tracing_check_open_get_tr(struct trace_array * tr)314 int tracing_check_open_get_tr(struct trace_array *tr)
315 {
316 	int ret;
317 
318 	ret = security_locked_down(LOCKDOWN_TRACEFS);
319 	if (ret)
320 		return ret;
321 
322 	if (tracing_disabled)
323 		return -ENODEV;
324 
325 	if (tr && trace_array_get(tr) < 0)
326 		return -ENODEV;
327 
328 	return 0;
329 }
330 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct ring_buffer * buffer,struct ring_buffer_event * event)331 int call_filter_check_discard(struct trace_event_call *call, void *rec,
332 			      struct ring_buffer *buffer,
333 			      struct ring_buffer_event *event)
334 {
335 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
336 	    !filter_match_preds(call->filter, rec)) {
337 		__trace_event_discard_commit(buffer, event);
338 		return 1;
339 	}
340 
341 	return 0;
342 }
343 
trace_free_pid_list(struct trace_pid_list * pid_list)344 void trace_free_pid_list(struct trace_pid_list *pid_list)
345 {
346 	vfree(pid_list->pids);
347 	kfree(pid_list);
348 }
349 
350 /**
351  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
352  * @filtered_pids: The list of pids to check
353  * @search_pid: The PID to find in @filtered_pids
354  *
355  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
356  */
357 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)358 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
359 {
360 	/*
361 	 * If pid_max changed after filtered_pids was created, we
362 	 * by default ignore all pids greater than the previous pid_max.
363 	 */
364 	if (search_pid >= filtered_pids->pid_max)
365 		return false;
366 
367 	return test_bit(search_pid, filtered_pids->pids);
368 }
369 
370 /**
371  * trace_ignore_this_task - should a task be ignored for tracing
372  * @filtered_pids: The list of pids to check
373  * @task: The task that should be ignored if not filtered
374  *
375  * Checks if @task should be traced or not from @filtered_pids.
376  * Returns true if @task should *NOT* be traced.
377  * Returns false if @task should be traced.
378  */
379 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct task_struct * task)380 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
381 {
382 	/*
383 	 * Return false, because if filtered_pids does not exist,
384 	 * all pids are good to trace.
385 	 */
386 	if (!filtered_pids)
387 		return false;
388 
389 	return !trace_find_filtered_pid(filtered_pids, task->pid);
390 }
391 
392 /**
393  * trace_filter_add_remove_task - Add or remove a task from a pid_list
394  * @pid_list: The list to modify
395  * @self: The current task for fork or NULL for exit
396  * @task: The task to add or remove
397  *
398  * If adding a task, if @self is defined, the task is only added if @self
399  * is also included in @pid_list. This happens on fork and tasks should
400  * only be added when the parent is listed. If @self is NULL, then the
401  * @task pid will be removed from the list, which would happen on exit
402  * of a task.
403  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)404 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
405 				  struct task_struct *self,
406 				  struct task_struct *task)
407 {
408 	if (!pid_list)
409 		return;
410 
411 	/* For forks, we only add if the forking task is listed */
412 	if (self) {
413 		if (!trace_find_filtered_pid(pid_list, self->pid))
414 			return;
415 	}
416 
417 	/* Sorry, but we don't support pid_max changing after setting */
418 	if (task->pid >= pid_list->pid_max)
419 		return;
420 
421 	/* "self" is set for forks, and NULL for exits */
422 	if (self)
423 		set_bit(task->pid, pid_list->pids);
424 	else
425 		clear_bit(task->pid, pid_list->pids);
426 }
427 
428 /**
429  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
430  * @pid_list: The pid list to show
431  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
432  * @pos: The position of the file
433  *
434  * This is used by the seq_file "next" operation to iterate the pids
435  * listed in a trace_pid_list structure.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)440 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
441 {
442 	unsigned long pid = (unsigned long)v;
443 
444 	(*pos)++;
445 
446 	/* pid already is +1 of the actual prevous bit */
447 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
448 
449 	/* Return pid + 1 to allow zero to be represented */
450 	if (pid < pid_list->pid_max)
451 		return (void *)(pid + 1);
452 
453 	return NULL;
454 }
455 
456 /**
457  * trace_pid_start - Used for seq_file to start reading pid lists
458  * @pid_list: The pid list to show
459  * @pos: The position of the file
460  *
461  * This is used by seq_file "start" operation to start the iteration
462  * of listing pids.
463  *
464  * Returns the pid+1 as we want to display pid of zero, but NULL would
465  * stop the iteration.
466  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)467 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
468 {
469 	unsigned long pid;
470 	loff_t l = 0;
471 
472 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
473 	if (pid >= pid_list->pid_max)
474 		return NULL;
475 
476 	/* Return pid + 1 so that zero can be the exit value */
477 	for (pid++; pid && l < *pos;
478 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
479 		;
480 	return (void *)pid;
481 }
482 
483 /**
484  * trace_pid_show - show the current pid in seq_file processing
485  * @m: The seq_file structure to write into
486  * @v: A void pointer of the pid (+1) value to display
487  *
488  * Can be directly used by seq_file operations to display the current
489  * pid value.
490  */
trace_pid_show(struct seq_file * m,void * v)491 int trace_pid_show(struct seq_file *m, void *v)
492 {
493 	unsigned long pid = (unsigned long)v - 1;
494 
495 	seq_printf(m, "%lu\n", pid);
496 	return 0;
497 }
498 
499 /* 128 should be much more than enough */
500 #define PID_BUF_SIZE		127
501 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)502 int trace_pid_write(struct trace_pid_list *filtered_pids,
503 		    struct trace_pid_list **new_pid_list,
504 		    const char __user *ubuf, size_t cnt)
505 {
506 	struct trace_pid_list *pid_list;
507 	struct trace_parser parser;
508 	unsigned long val;
509 	int nr_pids = 0;
510 	ssize_t read = 0;
511 	ssize_t ret = 0;
512 	loff_t pos;
513 	pid_t pid;
514 
515 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
516 		return -ENOMEM;
517 
518 	/*
519 	 * Always recreate a new array. The write is an all or nothing
520 	 * operation. Always create a new array when adding new pids by
521 	 * the user. If the operation fails, then the current list is
522 	 * not modified.
523 	 */
524 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
525 	if (!pid_list) {
526 		trace_parser_put(&parser);
527 		return -ENOMEM;
528 	}
529 
530 	pid_list->pid_max = READ_ONCE(pid_max);
531 
532 	/* Only truncating will shrink pid_max */
533 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
534 		pid_list->pid_max = filtered_pids->pid_max;
535 
536 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
537 	if (!pid_list->pids) {
538 		trace_parser_put(&parser);
539 		kfree(pid_list);
540 		return -ENOMEM;
541 	}
542 
543 	if (filtered_pids) {
544 		/* copy the current bits to the new max */
545 		for_each_set_bit(pid, filtered_pids->pids,
546 				 filtered_pids->pid_max) {
547 			set_bit(pid, pid_list->pids);
548 			nr_pids++;
549 		}
550 	}
551 
552 	while (cnt > 0) {
553 
554 		pos = 0;
555 
556 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
557 		if (ret < 0 || !trace_parser_loaded(&parser))
558 			break;
559 
560 		read += ret;
561 		ubuf += ret;
562 		cnt -= ret;
563 
564 		ret = -EINVAL;
565 		if (kstrtoul(parser.buffer, 0, &val))
566 			break;
567 		if (val >= pid_list->pid_max)
568 			break;
569 
570 		pid = (pid_t)val;
571 
572 		set_bit(pid, pid_list->pids);
573 		nr_pids++;
574 
575 		trace_parser_clear(&parser);
576 		ret = 0;
577 	}
578 	trace_parser_put(&parser);
579 
580 	if (ret < 0) {
581 		trace_free_pid_list(pid_list);
582 		return ret;
583 	}
584 
585 	if (!nr_pids) {
586 		/* Cleared the list of pids */
587 		trace_free_pid_list(pid_list);
588 		read = ret;
589 		pid_list = NULL;
590 	}
591 
592 	*new_pid_list = pid_list;
593 
594 	return read;
595 }
596 
buffer_ftrace_now(struct trace_buffer * buf,int cpu)597 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
598 {
599 	u64 ts;
600 
601 	/* Early boot up does not have a buffer yet */
602 	if (!buf->buffer)
603 		return trace_clock_local();
604 
605 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
606 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
607 
608 	return ts;
609 }
610 
ftrace_now(int cpu)611 u64 ftrace_now(int cpu)
612 {
613 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
614 }
615 
616 /**
617  * tracing_is_enabled - Show if global_trace has been disabled
618  *
619  * Shows if the global trace has been enabled or not. It uses the
620  * mirror flag "buffer_disabled" to be used in fast paths such as for
621  * the irqsoff tracer. But it may be inaccurate due to races. If you
622  * need to know the accurate state, use tracing_is_on() which is a little
623  * slower, but accurate.
624  */
tracing_is_enabled(void)625 int tracing_is_enabled(void)
626 {
627 	/*
628 	 * For quick access (irqsoff uses this in fast path), just
629 	 * return the mirror variable of the state of the ring buffer.
630 	 * It's a little racy, but we don't really care.
631 	 */
632 	smp_rmb();
633 	return !global_trace.buffer_disabled;
634 }
635 
636 /*
637  * trace_buf_size is the size in bytes that is allocated
638  * for a buffer. Note, the number of bytes is always rounded
639  * to page size.
640  *
641  * This number is purposely set to a low number of 16384.
642  * If the dump on oops happens, it will be much appreciated
643  * to not have to wait for all that output. Anyway this can be
644  * boot time and run time configurable.
645  */
646 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
647 
648 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
649 
650 /* trace_types holds a link list of available tracers. */
651 static struct tracer		*trace_types __read_mostly;
652 
653 /*
654  * trace_types_lock is used to protect the trace_types list.
655  */
656 DEFINE_MUTEX(trace_types_lock);
657 
658 /*
659  * serialize the access of the ring buffer
660  *
661  * ring buffer serializes readers, but it is low level protection.
662  * The validity of the events (which returns by ring_buffer_peek() ..etc)
663  * are not protected by ring buffer.
664  *
665  * The content of events may become garbage if we allow other process consumes
666  * these events concurrently:
667  *   A) the page of the consumed events may become a normal page
668  *      (not reader page) in ring buffer, and this page will be rewrited
669  *      by events producer.
670  *   B) The page of the consumed events may become a page for splice_read,
671  *      and this page will be returned to system.
672  *
673  * These primitives allow multi process access to different cpu ring buffer
674  * concurrently.
675  *
676  * These primitives don't distinguish read-only and read-consume access.
677  * Multi read-only access are also serialized.
678  */
679 
680 #ifdef CONFIG_SMP
681 static DECLARE_RWSEM(all_cpu_access_lock);
682 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
683 
trace_access_lock(int cpu)684 static inline void trace_access_lock(int cpu)
685 {
686 	if (cpu == RING_BUFFER_ALL_CPUS) {
687 		/* gain it for accessing the whole ring buffer. */
688 		down_write(&all_cpu_access_lock);
689 	} else {
690 		/* gain it for accessing a cpu ring buffer. */
691 
692 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
693 		down_read(&all_cpu_access_lock);
694 
695 		/* Secondly block other access to this @cpu ring buffer. */
696 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
697 	}
698 }
699 
trace_access_unlock(int cpu)700 static inline void trace_access_unlock(int cpu)
701 {
702 	if (cpu == RING_BUFFER_ALL_CPUS) {
703 		up_write(&all_cpu_access_lock);
704 	} else {
705 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
706 		up_read(&all_cpu_access_lock);
707 	}
708 }
709 
trace_access_lock_init(void)710 static inline void trace_access_lock_init(void)
711 {
712 	int cpu;
713 
714 	for_each_possible_cpu(cpu)
715 		mutex_init(&per_cpu(cpu_access_lock, cpu));
716 }
717 
718 #else
719 
720 static DEFINE_MUTEX(access_lock);
721 
trace_access_lock(int cpu)722 static inline void trace_access_lock(int cpu)
723 {
724 	(void)cpu;
725 	mutex_lock(&access_lock);
726 }
727 
trace_access_unlock(int cpu)728 static inline void trace_access_unlock(int cpu)
729 {
730 	(void)cpu;
731 	mutex_unlock(&access_lock);
732 }
733 
trace_access_lock_init(void)734 static inline void trace_access_lock_init(void)
735 {
736 }
737 
738 #endif
739 
740 #ifdef CONFIG_STACKTRACE
741 static void __ftrace_trace_stack(struct ring_buffer *buffer,
742 				 unsigned long flags,
743 				 int skip, int pc, struct pt_regs *regs);
744 static inline void ftrace_trace_stack(struct trace_array *tr,
745 				      struct ring_buffer *buffer,
746 				      unsigned long flags,
747 				      int skip, int pc, struct pt_regs *regs);
748 
749 #else
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)750 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
751 					unsigned long flags,
752 					int skip, int pc, struct pt_regs *regs)
753 {
754 }
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)755 static inline void ftrace_trace_stack(struct trace_array *tr,
756 				      struct ring_buffer *buffer,
757 				      unsigned long flags,
758 				      int skip, int pc, struct pt_regs *regs)
759 {
760 }
761 
762 #endif
763 
764 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)765 trace_event_setup(struct ring_buffer_event *event,
766 		  int type, unsigned long flags, int pc)
767 {
768 	struct trace_entry *ent = ring_buffer_event_data(event);
769 
770 	tracing_generic_entry_update(ent, type, flags, pc);
771 }
772 
773 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)774 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
775 			  int type,
776 			  unsigned long len,
777 			  unsigned long flags, int pc)
778 {
779 	struct ring_buffer_event *event;
780 
781 	event = ring_buffer_lock_reserve(buffer, len);
782 	if (event != NULL)
783 		trace_event_setup(event, type, flags, pc);
784 
785 	return event;
786 }
787 
tracer_tracing_on(struct trace_array * tr)788 void tracer_tracing_on(struct trace_array *tr)
789 {
790 	if (tr->trace_buffer.buffer)
791 		ring_buffer_record_on(tr->trace_buffer.buffer);
792 	/*
793 	 * This flag is looked at when buffers haven't been allocated
794 	 * yet, or by some tracers (like irqsoff), that just want to
795 	 * know if the ring buffer has been disabled, but it can handle
796 	 * races of where it gets disabled but we still do a record.
797 	 * As the check is in the fast path of the tracers, it is more
798 	 * important to be fast than accurate.
799 	 */
800 	tr->buffer_disabled = 0;
801 	/* Make the flag seen by readers */
802 	smp_wmb();
803 }
804 
805 /**
806  * tracing_on - enable tracing buffers
807  *
808  * This function enables tracing buffers that may have been
809  * disabled with tracing_off.
810  */
tracing_on(void)811 void tracing_on(void)
812 {
813 	tracer_tracing_on(&global_trace);
814 }
815 EXPORT_SYMBOL_GPL(tracing_on);
816 
817 
818 static __always_inline void
__buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)819 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
820 {
821 	__this_cpu_write(trace_taskinfo_save, true);
822 
823 	/* If this is the temp buffer, we need to commit fully */
824 	if (this_cpu_read(trace_buffered_event) == event) {
825 		/* Length is in event->array[0] */
826 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
827 		/* Release the temp buffer */
828 		this_cpu_dec(trace_buffered_event_cnt);
829 	} else
830 		ring_buffer_unlock_commit(buffer, event);
831 }
832 
833 /**
834  * __trace_puts - write a constant string into the trace buffer.
835  * @ip:	   The address of the caller
836  * @str:   The constant string to write
837  * @size:  The size of the string.
838  */
__trace_puts(unsigned long ip,const char * str,int size)839 int __trace_puts(unsigned long ip, const char *str, int size)
840 {
841 	struct ring_buffer_event *event;
842 	struct ring_buffer *buffer;
843 	struct print_entry *entry;
844 	unsigned long irq_flags;
845 	int alloc;
846 	int pc;
847 
848 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
849 		return 0;
850 
851 	pc = preempt_count();
852 
853 	if (unlikely(tracing_selftest_running || tracing_disabled))
854 		return 0;
855 
856 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
857 
858 	local_save_flags(irq_flags);
859 	buffer = global_trace.trace_buffer.buffer;
860 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
861 					    irq_flags, pc);
862 	if (!event)
863 		return 0;
864 
865 	entry = ring_buffer_event_data(event);
866 	entry->ip = ip;
867 
868 	memcpy(&entry->buf, str, size);
869 
870 	/* Add a newline if necessary */
871 	if (entry->buf[size - 1] != '\n') {
872 		entry->buf[size] = '\n';
873 		entry->buf[size + 1] = '\0';
874 	} else
875 		entry->buf[size] = '\0';
876 
877 	__buffer_unlock_commit(buffer, event);
878 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
879 
880 	return size;
881 }
882 EXPORT_SYMBOL_GPL(__trace_puts);
883 
884 /**
885  * __trace_bputs - write the pointer to a constant string into trace buffer
886  * @ip:	   The address of the caller
887  * @str:   The constant string to write to the buffer to
888  */
__trace_bputs(unsigned long ip,const char * str)889 int __trace_bputs(unsigned long ip, const char *str)
890 {
891 	struct ring_buffer_event *event;
892 	struct ring_buffer *buffer;
893 	struct bputs_entry *entry;
894 	unsigned long irq_flags;
895 	int size = sizeof(struct bputs_entry);
896 	int pc;
897 
898 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
899 		return 0;
900 
901 	pc = preempt_count();
902 
903 	if (unlikely(tracing_selftest_running || tracing_disabled))
904 		return 0;
905 
906 	local_save_flags(irq_flags);
907 	buffer = global_trace.trace_buffer.buffer;
908 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
909 					    irq_flags, pc);
910 	if (!event)
911 		return 0;
912 
913 	entry = ring_buffer_event_data(event);
914 	entry->ip			= ip;
915 	entry->str			= str;
916 
917 	__buffer_unlock_commit(buffer, event);
918 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
919 
920 	return 1;
921 }
922 EXPORT_SYMBOL_GPL(__trace_bputs);
923 
924 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)925 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
926 {
927 	struct tracer *tracer = tr->current_trace;
928 	unsigned long flags;
929 
930 	if (in_nmi()) {
931 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
932 		internal_trace_puts("*** snapshot is being ignored        ***\n");
933 		return;
934 	}
935 
936 	if (!tr->allocated_snapshot) {
937 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
938 		internal_trace_puts("*** stopping trace here!   ***\n");
939 		tracing_off();
940 		return;
941 	}
942 
943 	/* Note, snapshot can not be used when the tracer uses it */
944 	if (tracer->use_max_tr) {
945 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
946 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
947 		return;
948 	}
949 
950 	local_irq_save(flags);
951 	update_max_tr(tr, current, smp_processor_id(), cond_data);
952 	local_irq_restore(flags);
953 }
954 
tracing_snapshot_instance(struct trace_array * tr)955 void tracing_snapshot_instance(struct trace_array *tr)
956 {
957 	tracing_snapshot_instance_cond(tr, NULL);
958 }
959 
960 /**
961  * tracing_snapshot - take a snapshot of the current buffer.
962  *
963  * This causes a swap between the snapshot buffer and the current live
964  * tracing buffer. You can use this to take snapshots of the live
965  * trace when some condition is triggered, but continue to trace.
966  *
967  * Note, make sure to allocate the snapshot with either
968  * a tracing_snapshot_alloc(), or by doing it manually
969  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
970  *
971  * If the snapshot buffer is not allocated, it will stop tracing.
972  * Basically making a permanent snapshot.
973  */
tracing_snapshot(void)974 void tracing_snapshot(void)
975 {
976 	struct trace_array *tr = &global_trace;
977 
978 	tracing_snapshot_instance(tr);
979 }
980 EXPORT_SYMBOL_GPL(tracing_snapshot);
981 
982 /**
983  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
984  * @tr:		The tracing instance to snapshot
985  * @cond_data:	The data to be tested conditionally, and possibly saved
986  *
987  * This is the same as tracing_snapshot() except that the snapshot is
988  * conditional - the snapshot will only happen if the
989  * cond_snapshot.update() implementation receiving the cond_data
990  * returns true, which means that the trace array's cond_snapshot
991  * update() operation used the cond_data to determine whether the
992  * snapshot should be taken, and if it was, presumably saved it along
993  * with the snapshot.
994  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)995 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
996 {
997 	tracing_snapshot_instance_cond(tr, cond_data);
998 }
999 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1000 
1001 /**
1002  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1003  * @tr:		The tracing instance
1004  *
1005  * When the user enables a conditional snapshot using
1006  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1007  * with the snapshot.  This accessor is used to retrieve it.
1008  *
1009  * Should not be called from cond_snapshot.update(), since it takes
1010  * the tr->max_lock lock, which the code calling
1011  * cond_snapshot.update() has already done.
1012  *
1013  * Returns the cond_data associated with the trace array's snapshot.
1014  */
tracing_cond_snapshot_data(struct trace_array * tr)1015 void *tracing_cond_snapshot_data(struct trace_array *tr)
1016 {
1017 	void *cond_data = NULL;
1018 
1019 	local_irq_disable();
1020 	arch_spin_lock(&tr->max_lock);
1021 
1022 	if (tr->cond_snapshot)
1023 		cond_data = tr->cond_snapshot->cond_data;
1024 
1025 	arch_spin_unlock(&tr->max_lock);
1026 	local_irq_enable();
1027 
1028 	return cond_data;
1029 }
1030 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1031 
1032 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1033 					struct trace_buffer *size_buf, int cpu_id);
1034 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1035 
tracing_alloc_snapshot_instance(struct trace_array * tr)1036 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1037 {
1038 	int ret;
1039 
1040 	if (!tr->allocated_snapshot) {
1041 
1042 		/* allocate spare buffer */
1043 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1044 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1045 		if (ret < 0)
1046 			return ret;
1047 
1048 		tr->allocated_snapshot = true;
1049 	}
1050 
1051 	return 0;
1052 }
1053 
free_snapshot(struct trace_array * tr)1054 static void free_snapshot(struct trace_array *tr)
1055 {
1056 	/*
1057 	 * We don't free the ring buffer. instead, resize it because
1058 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1059 	 * we want preserve it.
1060 	 */
1061 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1062 	set_buffer_entries(&tr->max_buffer, 1);
1063 	tracing_reset_online_cpus(&tr->max_buffer);
1064 	tr->allocated_snapshot = false;
1065 }
1066 
1067 /**
1068  * tracing_alloc_snapshot - allocate snapshot buffer.
1069  *
1070  * This only allocates the snapshot buffer if it isn't already
1071  * allocated - it doesn't also take a snapshot.
1072  *
1073  * This is meant to be used in cases where the snapshot buffer needs
1074  * to be set up for events that can't sleep but need to be able to
1075  * trigger a snapshot.
1076  */
tracing_alloc_snapshot(void)1077 int tracing_alloc_snapshot(void)
1078 {
1079 	struct trace_array *tr = &global_trace;
1080 	int ret;
1081 
1082 	ret = tracing_alloc_snapshot_instance(tr);
1083 	WARN_ON(ret < 0);
1084 
1085 	return ret;
1086 }
1087 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1088 
1089 /**
1090  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1091  *
1092  * This is similar to tracing_snapshot(), but it will allocate the
1093  * snapshot buffer if it isn't already allocated. Use this only
1094  * where it is safe to sleep, as the allocation may sleep.
1095  *
1096  * This causes a swap between the snapshot buffer and the current live
1097  * tracing buffer. You can use this to take snapshots of the live
1098  * trace when some condition is triggered, but continue to trace.
1099  */
tracing_snapshot_alloc(void)1100 void tracing_snapshot_alloc(void)
1101 {
1102 	int ret;
1103 
1104 	ret = tracing_alloc_snapshot();
1105 	if (ret < 0)
1106 		return;
1107 
1108 	tracing_snapshot();
1109 }
1110 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1111 
1112 /**
1113  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1114  * @tr:		The tracing instance
1115  * @cond_data:	User data to associate with the snapshot
1116  * @update:	Implementation of the cond_snapshot update function
1117  *
1118  * Check whether the conditional snapshot for the given instance has
1119  * already been enabled, or if the current tracer is already using a
1120  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1121  * save the cond_data and update function inside.
1122  *
1123  * Returns 0 if successful, error otherwise.
1124  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1125 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1126 				 cond_update_fn_t update)
1127 {
1128 	struct cond_snapshot *cond_snapshot;
1129 	int ret = 0;
1130 
1131 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1132 	if (!cond_snapshot)
1133 		return -ENOMEM;
1134 
1135 	cond_snapshot->cond_data = cond_data;
1136 	cond_snapshot->update = update;
1137 
1138 	mutex_lock(&trace_types_lock);
1139 
1140 	ret = tracing_alloc_snapshot_instance(tr);
1141 	if (ret)
1142 		goto fail_unlock;
1143 
1144 	if (tr->current_trace->use_max_tr) {
1145 		ret = -EBUSY;
1146 		goto fail_unlock;
1147 	}
1148 
1149 	/*
1150 	 * The cond_snapshot can only change to NULL without the
1151 	 * trace_types_lock. We don't care if we race with it going
1152 	 * to NULL, but we want to make sure that it's not set to
1153 	 * something other than NULL when we get here, which we can
1154 	 * do safely with only holding the trace_types_lock and not
1155 	 * having to take the max_lock.
1156 	 */
1157 	if (tr->cond_snapshot) {
1158 		ret = -EBUSY;
1159 		goto fail_unlock;
1160 	}
1161 
1162 	local_irq_disable();
1163 	arch_spin_lock(&tr->max_lock);
1164 	tr->cond_snapshot = cond_snapshot;
1165 	arch_spin_unlock(&tr->max_lock);
1166 	local_irq_enable();
1167 
1168 	mutex_unlock(&trace_types_lock);
1169 
1170 	return ret;
1171 
1172  fail_unlock:
1173 	mutex_unlock(&trace_types_lock);
1174 	kfree(cond_snapshot);
1175 	return ret;
1176 }
1177 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1178 
1179 /**
1180  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1181  * @tr:		The tracing instance
1182  *
1183  * Check whether the conditional snapshot for the given instance is
1184  * enabled; if so, free the cond_snapshot associated with it,
1185  * otherwise return -EINVAL.
1186  *
1187  * Returns 0 if successful, error otherwise.
1188  */
tracing_snapshot_cond_disable(struct trace_array * tr)1189 int tracing_snapshot_cond_disable(struct trace_array *tr)
1190 {
1191 	int ret = 0;
1192 
1193 	local_irq_disable();
1194 	arch_spin_lock(&tr->max_lock);
1195 
1196 	if (!tr->cond_snapshot)
1197 		ret = -EINVAL;
1198 	else {
1199 		kfree(tr->cond_snapshot);
1200 		tr->cond_snapshot = NULL;
1201 	}
1202 
1203 	arch_spin_unlock(&tr->max_lock);
1204 	local_irq_enable();
1205 
1206 	return ret;
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1209 #else
tracing_snapshot(void)1210 void tracing_snapshot(void)
1211 {
1212 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1213 }
1214 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1215 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1216 {
1217 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1218 }
1219 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1220 int tracing_alloc_snapshot(void)
1221 {
1222 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1223 	return -ENODEV;
1224 }
1225 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1226 void tracing_snapshot_alloc(void)
1227 {
1228 	/* Give warning */
1229 	tracing_snapshot();
1230 }
1231 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1232 void *tracing_cond_snapshot_data(struct trace_array *tr)
1233 {
1234 	return NULL;
1235 }
1236 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1237 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1238 {
1239 	return -ENODEV;
1240 }
1241 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1242 int tracing_snapshot_cond_disable(struct trace_array *tr)
1243 {
1244 	return false;
1245 }
1246 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1247 #endif /* CONFIG_TRACER_SNAPSHOT */
1248 
tracer_tracing_off(struct trace_array * tr)1249 void tracer_tracing_off(struct trace_array *tr)
1250 {
1251 	if (tr->trace_buffer.buffer)
1252 		ring_buffer_record_off(tr->trace_buffer.buffer);
1253 	/*
1254 	 * This flag is looked at when buffers haven't been allocated
1255 	 * yet, or by some tracers (like irqsoff), that just want to
1256 	 * know if the ring buffer has been disabled, but it can handle
1257 	 * races of where it gets disabled but we still do a record.
1258 	 * As the check is in the fast path of the tracers, it is more
1259 	 * important to be fast than accurate.
1260 	 */
1261 	tr->buffer_disabled = 1;
1262 	/* Make the flag seen by readers */
1263 	smp_wmb();
1264 }
1265 
1266 /**
1267  * tracing_off - turn off tracing buffers
1268  *
1269  * This function stops the tracing buffers from recording data.
1270  * It does not disable any overhead the tracers themselves may
1271  * be causing. This function simply causes all recording to
1272  * the ring buffers to fail.
1273  */
tracing_off(void)1274 void tracing_off(void)
1275 {
1276 	tracer_tracing_off(&global_trace);
1277 }
1278 EXPORT_SYMBOL_GPL(tracing_off);
1279 
disable_trace_on_warning(void)1280 void disable_trace_on_warning(void)
1281 {
1282 	if (__disable_trace_on_warning)
1283 		tracing_off();
1284 }
1285 
1286 /**
1287  * tracer_tracing_is_on - show real state of ring buffer enabled
1288  * @tr : the trace array to know if ring buffer is enabled
1289  *
1290  * Shows real state of the ring buffer if it is enabled or not.
1291  */
tracer_tracing_is_on(struct trace_array * tr)1292 bool tracer_tracing_is_on(struct trace_array *tr)
1293 {
1294 	if (tr->trace_buffer.buffer)
1295 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1296 	return !tr->buffer_disabled;
1297 }
1298 
1299 /**
1300  * tracing_is_on - show state of ring buffers enabled
1301  */
tracing_is_on(void)1302 int tracing_is_on(void)
1303 {
1304 	return tracer_tracing_is_on(&global_trace);
1305 }
1306 EXPORT_SYMBOL_GPL(tracing_is_on);
1307 
set_buf_size(char * str)1308 static int __init set_buf_size(char *str)
1309 {
1310 	unsigned long buf_size;
1311 
1312 	if (!str)
1313 		return 0;
1314 	buf_size = memparse(str, &str);
1315 	/*
1316 	 * nr_entries can not be zero and the startup
1317 	 * tests require some buffer space. Therefore
1318 	 * ensure we have at least 4096 bytes of buffer.
1319 	 */
1320 	trace_buf_size = max(4096UL, buf_size);
1321 	return 1;
1322 }
1323 __setup("trace_buf_size=", set_buf_size);
1324 
set_tracing_thresh(char * str)1325 static int __init set_tracing_thresh(char *str)
1326 {
1327 	unsigned long threshold;
1328 	int ret;
1329 
1330 	if (!str)
1331 		return 0;
1332 	ret = kstrtoul(str, 0, &threshold);
1333 	if (ret < 0)
1334 		return 0;
1335 	tracing_thresh = threshold * 1000;
1336 	return 1;
1337 }
1338 __setup("tracing_thresh=", set_tracing_thresh);
1339 
nsecs_to_usecs(unsigned long nsecs)1340 unsigned long nsecs_to_usecs(unsigned long nsecs)
1341 {
1342 	return nsecs / 1000;
1343 }
1344 
1345 /*
1346  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1347  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1348  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1349  * of strings in the order that the evals (enum) were defined.
1350  */
1351 #undef C
1352 #define C(a, b) b
1353 
1354 /* These must match the bit postions in trace_iterator_flags */
1355 static const char *trace_options[] = {
1356 	TRACE_FLAGS
1357 	NULL
1358 };
1359 
1360 static struct {
1361 	u64 (*func)(void);
1362 	const char *name;
1363 	int in_ns;		/* is this clock in nanoseconds? */
1364 } trace_clocks[] = {
1365 	{ trace_clock_local,		"local",	1 },
1366 	{ trace_clock_global,		"global",	1 },
1367 	{ trace_clock_counter,		"counter",	0 },
1368 	{ trace_clock_jiffies,		"uptime",	0 },
1369 	{ trace_clock,			"perf",		1 },
1370 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1371 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1372 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1373 	ARCH_TRACE_CLOCKS
1374 };
1375 
trace_clock_in_ns(struct trace_array * tr)1376 bool trace_clock_in_ns(struct trace_array *tr)
1377 {
1378 	if (trace_clocks[tr->clock_id].in_ns)
1379 		return true;
1380 
1381 	return false;
1382 }
1383 
1384 /*
1385  * trace_parser_get_init - gets the buffer for trace parser
1386  */
trace_parser_get_init(struct trace_parser * parser,int size)1387 int trace_parser_get_init(struct trace_parser *parser, int size)
1388 {
1389 	memset(parser, 0, sizeof(*parser));
1390 
1391 	parser->buffer = kmalloc(size, GFP_KERNEL);
1392 	if (!parser->buffer)
1393 		return 1;
1394 
1395 	parser->size = size;
1396 	return 0;
1397 }
1398 
1399 /*
1400  * trace_parser_put - frees the buffer for trace parser
1401  */
trace_parser_put(struct trace_parser * parser)1402 void trace_parser_put(struct trace_parser *parser)
1403 {
1404 	kfree(parser->buffer);
1405 	parser->buffer = NULL;
1406 }
1407 
1408 /*
1409  * trace_get_user - reads the user input string separated by  space
1410  * (matched by isspace(ch))
1411  *
1412  * For each string found the 'struct trace_parser' is updated,
1413  * and the function returns.
1414  *
1415  * Returns number of bytes read.
1416  *
1417  * See kernel/trace/trace.h for 'struct trace_parser' details.
1418  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1419 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1420 	size_t cnt, loff_t *ppos)
1421 {
1422 	char ch;
1423 	size_t read = 0;
1424 	ssize_t ret;
1425 
1426 	if (!*ppos)
1427 		trace_parser_clear(parser);
1428 
1429 	ret = get_user(ch, ubuf++);
1430 	if (ret)
1431 		goto out;
1432 
1433 	read++;
1434 	cnt--;
1435 
1436 	/*
1437 	 * The parser is not finished with the last write,
1438 	 * continue reading the user input without skipping spaces.
1439 	 */
1440 	if (!parser->cont) {
1441 		/* skip white space */
1442 		while (cnt && isspace(ch)) {
1443 			ret = get_user(ch, ubuf++);
1444 			if (ret)
1445 				goto out;
1446 			read++;
1447 			cnt--;
1448 		}
1449 
1450 		parser->idx = 0;
1451 
1452 		/* only spaces were written */
1453 		if (isspace(ch) || !ch) {
1454 			*ppos += read;
1455 			ret = read;
1456 			goto out;
1457 		}
1458 	}
1459 
1460 	/* read the non-space input */
1461 	while (cnt && !isspace(ch) && ch) {
1462 		if (parser->idx < parser->size - 1)
1463 			parser->buffer[parser->idx++] = ch;
1464 		else {
1465 			ret = -EINVAL;
1466 			goto out;
1467 		}
1468 		ret = get_user(ch, ubuf++);
1469 		if (ret)
1470 			goto out;
1471 		read++;
1472 		cnt--;
1473 	}
1474 
1475 	/* We either got finished input or we have to wait for another call. */
1476 	if (isspace(ch) || !ch) {
1477 		parser->buffer[parser->idx] = 0;
1478 		parser->cont = false;
1479 	} else if (parser->idx < parser->size - 1) {
1480 		parser->cont = true;
1481 		parser->buffer[parser->idx++] = ch;
1482 		/* Make sure the parsed string always terminates with '\0'. */
1483 		parser->buffer[parser->idx] = 0;
1484 	} else {
1485 		ret = -EINVAL;
1486 		goto out;
1487 	}
1488 
1489 	*ppos += read;
1490 	ret = read;
1491 
1492 out:
1493 	return ret;
1494 }
1495 
1496 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1497 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1498 {
1499 	int len;
1500 
1501 	if (trace_seq_used(s) <= s->seq.readpos)
1502 		return -EBUSY;
1503 
1504 	len = trace_seq_used(s) - s->seq.readpos;
1505 	if (cnt > len)
1506 		cnt = len;
1507 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1508 
1509 	s->seq.readpos += cnt;
1510 	return cnt;
1511 }
1512 
1513 unsigned long __read_mostly	tracing_thresh;
1514 
1515 #ifdef CONFIG_TRACER_MAX_TRACE
1516 /*
1517  * Copy the new maximum trace into the separate maximum-trace
1518  * structure. (this way the maximum trace is permanently saved,
1519  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1520  */
1521 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1522 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1523 {
1524 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1525 	struct trace_buffer *max_buf = &tr->max_buffer;
1526 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1527 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1528 
1529 	max_buf->cpu = cpu;
1530 	max_buf->time_start = data->preempt_timestamp;
1531 
1532 	max_data->saved_latency = tr->max_latency;
1533 	max_data->critical_start = data->critical_start;
1534 	max_data->critical_end = data->critical_end;
1535 
1536 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1537 	max_data->pid = tsk->pid;
1538 	/*
1539 	 * If tsk == current, then use current_uid(), as that does not use
1540 	 * RCU. The irq tracer can be called out of RCU scope.
1541 	 */
1542 	if (tsk == current)
1543 		max_data->uid = current_uid();
1544 	else
1545 		max_data->uid = task_uid(tsk);
1546 
1547 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1548 	max_data->policy = tsk->policy;
1549 	max_data->rt_priority = tsk->rt_priority;
1550 
1551 	/* record this tasks comm */
1552 	tracing_record_cmdline(tsk);
1553 }
1554 
1555 /**
1556  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1557  * @tr: tracer
1558  * @tsk: the task with the latency
1559  * @cpu: The cpu that initiated the trace.
1560  * @cond_data: User data associated with a conditional snapshot
1561  *
1562  * Flip the buffers between the @tr and the max_tr and record information
1563  * about which task was the cause of this latency.
1564  */
1565 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1566 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1567 	      void *cond_data)
1568 {
1569 	if (tr->stop_count)
1570 		return;
1571 
1572 	WARN_ON_ONCE(!irqs_disabled());
1573 
1574 	if (!tr->allocated_snapshot) {
1575 		/* Only the nop tracer should hit this when disabling */
1576 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1577 		return;
1578 	}
1579 
1580 	arch_spin_lock(&tr->max_lock);
1581 
1582 	/* Inherit the recordable setting from trace_buffer */
1583 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1584 		ring_buffer_record_on(tr->max_buffer.buffer);
1585 	else
1586 		ring_buffer_record_off(tr->max_buffer.buffer);
1587 
1588 #ifdef CONFIG_TRACER_SNAPSHOT
1589 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1590 		goto out_unlock;
1591 #endif
1592 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1593 
1594 	__update_max_tr(tr, tsk, cpu);
1595 
1596  out_unlock:
1597 	arch_spin_unlock(&tr->max_lock);
1598 }
1599 
1600 /**
1601  * update_max_tr_single - only copy one trace over, and reset the rest
1602  * @tr: tracer
1603  * @tsk: task with the latency
1604  * @cpu: the cpu of the buffer to copy.
1605  *
1606  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1607  */
1608 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1609 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1610 {
1611 	int ret;
1612 
1613 	if (tr->stop_count)
1614 		return;
1615 
1616 	WARN_ON_ONCE(!irqs_disabled());
1617 	if (!tr->allocated_snapshot) {
1618 		/* Only the nop tracer should hit this when disabling */
1619 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1620 		return;
1621 	}
1622 
1623 	arch_spin_lock(&tr->max_lock);
1624 
1625 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1626 
1627 	if (ret == -EBUSY) {
1628 		/*
1629 		 * We failed to swap the buffer due to a commit taking
1630 		 * place on this CPU. We fail to record, but we reset
1631 		 * the max trace buffer (no one writes directly to it)
1632 		 * and flag that it failed.
1633 		 */
1634 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1635 			"Failed to swap buffers due to commit in progress\n");
1636 	}
1637 
1638 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1639 
1640 	__update_max_tr(tr, tsk, cpu);
1641 	arch_spin_unlock(&tr->max_lock);
1642 }
1643 #endif /* CONFIG_TRACER_MAX_TRACE */
1644 
wait_on_pipe(struct trace_iterator * iter,int full)1645 static int wait_on_pipe(struct trace_iterator *iter, int full)
1646 {
1647 	/* Iterators are static, they should be filled or empty */
1648 	if (trace_buffer_iter(iter, iter->cpu_file))
1649 		return 0;
1650 
1651 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1652 				full);
1653 }
1654 
1655 #ifdef CONFIG_FTRACE_STARTUP_TEST
1656 static bool selftests_can_run;
1657 
1658 struct trace_selftests {
1659 	struct list_head		list;
1660 	struct tracer			*type;
1661 };
1662 
1663 static LIST_HEAD(postponed_selftests);
1664 
save_selftest(struct tracer * type)1665 static int save_selftest(struct tracer *type)
1666 {
1667 	struct trace_selftests *selftest;
1668 
1669 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1670 	if (!selftest)
1671 		return -ENOMEM;
1672 
1673 	selftest->type = type;
1674 	list_add(&selftest->list, &postponed_selftests);
1675 	return 0;
1676 }
1677 
run_tracer_selftest(struct tracer * type)1678 static int run_tracer_selftest(struct tracer *type)
1679 {
1680 	struct trace_array *tr = &global_trace;
1681 	struct tracer *saved_tracer = tr->current_trace;
1682 	int ret;
1683 
1684 	if (!type->selftest || tracing_selftest_disabled)
1685 		return 0;
1686 
1687 	/*
1688 	 * If a tracer registers early in boot up (before scheduling is
1689 	 * initialized and such), then do not run its selftests yet.
1690 	 * Instead, run it a little later in the boot process.
1691 	 */
1692 	if (!selftests_can_run)
1693 		return save_selftest(type);
1694 
1695 	/*
1696 	 * Run a selftest on this tracer.
1697 	 * Here we reset the trace buffer, and set the current
1698 	 * tracer to be this tracer. The tracer can then run some
1699 	 * internal tracing to verify that everything is in order.
1700 	 * If we fail, we do not register this tracer.
1701 	 */
1702 	tracing_reset_online_cpus(&tr->trace_buffer);
1703 
1704 	tr->current_trace = type;
1705 
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707 	if (type->use_max_tr) {
1708 		/* If we expanded the buffers, make sure the max is expanded too */
1709 		if (ring_buffer_expanded)
1710 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1711 					   RING_BUFFER_ALL_CPUS);
1712 		tr->allocated_snapshot = true;
1713 	}
1714 #endif
1715 
1716 	/* the test is responsible for initializing and enabling */
1717 	pr_info("Testing tracer %s: ", type->name);
1718 	ret = type->selftest(type, tr);
1719 	/* the test is responsible for resetting too */
1720 	tr->current_trace = saved_tracer;
1721 	if (ret) {
1722 		printk(KERN_CONT "FAILED!\n");
1723 		/* Add the warning after printing 'FAILED' */
1724 		WARN_ON(1);
1725 		return -1;
1726 	}
1727 	/* Only reset on passing, to avoid touching corrupted buffers */
1728 	tracing_reset_online_cpus(&tr->trace_buffer);
1729 
1730 #ifdef CONFIG_TRACER_MAX_TRACE
1731 	if (type->use_max_tr) {
1732 		tr->allocated_snapshot = false;
1733 
1734 		/* Shrink the max buffer again */
1735 		if (ring_buffer_expanded)
1736 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1737 					   RING_BUFFER_ALL_CPUS);
1738 	}
1739 #endif
1740 
1741 	printk(KERN_CONT "PASSED\n");
1742 	return 0;
1743 }
1744 
init_trace_selftests(void)1745 static __init int init_trace_selftests(void)
1746 {
1747 	struct trace_selftests *p, *n;
1748 	struct tracer *t, **last;
1749 	int ret;
1750 
1751 	selftests_can_run = true;
1752 
1753 	mutex_lock(&trace_types_lock);
1754 
1755 	if (list_empty(&postponed_selftests))
1756 		goto out;
1757 
1758 	pr_info("Running postponed tracer tests:\n");
1759 
1760 	tracing_selftest_running = true;
1761 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1762 		/* This loop can take minutes when sanitizers are enabled, so
1763 		 * lets make sure we allow RCU processing.
1764 		 */
1765 		cond_resched();
1766 		ret = run_tracer_selftest(p->type);
1767 		/* If the test fails, then warn and remove from available_tracers */
1768 		if (ret < 0) {
1769 			WARN(1, "tracer: %s failed selftest, disabling\n",
1770 			     p->type->name);
1771 			last = &trace_types;
1772 			for (t = trace_types; t; t = t->next) {
1773 				if (t == p->type) {
1774 					*last = t->next;
1775 					break;
1776 				}
1777 				last = &t->next;
1778 			}
1779 		}
1780 		list_del(&p->list);
1781 		kfree(p);
1782 	}
1783 	tracing_selftest_running = false;
1784 
1785  out:
1786 	mutex_unlock(&trace_types_lock);
1787 
1788 	return 0;
1789 }
1790 core_initcall(init_trace_selftests);
1791 #else
run_tracer_selftest(struct tracer * type)1792 static inline int run_tracer_selftest(struct tracer *type)
1793 {
1794 	return 0;
1795 }
1796 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1797 
1798 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1799 
1800 static void __init apply_trace_boot_options(void);
1801 
1802 /**
1803  * register_tracer - register a tracer with the ftrace system.
1804  * @type: the plugin for the tracer
1805  *
1806  * Register a new plugin tracer.
1807  */
register_tracer(struct tracer * type)1808 int __init register_tracer(struct tracer *type)
1809 {
1810 	struct tracer *t;
1811 	int ret = 0;
1812 
1813 	if (!type->name) {
1814 		pr_info("Tracer must have a name\n");
1815 		return -1;
1816 	}
1817 
1818 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1819 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1820 		return -1;
1821 	}
1822 
1823 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1824 		pr_warning("Can not register tracer %s due to lockdown\n",
1825 			   type->name);
1826 		return -EPERM;
1827 	}
1828 
1829 	mutex_lock(&trace_types_lock);
1830 
1831 	tracing_selftest_running = true;
1832 
1833 	for (t = trace_types; t; t = t->next) {
1834 		if (strcmp(type->name, t->name) == 0) {
1835 			/* already found */
1836 			pr_info("Tracer %s already registered\n",
1837 				type->name);
1838 			ret = -1;
1839 			goto out;
1840 		}
1841 	}
1842 
1843 	if (!type->set_flag)
1844 		type->set_flag = &dummy_set_flag;
1845 	if (!type->flags) {
1846 		/*allocate a dummy tracer_flags*/
1847 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1848 		if (!type->flags) {
1849 			ret = -ENOMEM;
1850 			goto out;
1851 		}
1852 		type->flags->val = 0;
1853 		type->flags->opts = dummy_tracer_opt;
1854 	} else
1855 		if (!type->flags->opts)
1856 			type->flags->opts = dummy_tracer_opt;
1857 
1858 	/* store the tracer for __set_tracer_option */
1859 	type->flags->trace = type;
1860 
1861 	ret = run_tracer_selftest(type);
1862 	if (ret < 0)
1863 		goto out;
1864 
1865 	type->next = trace_types;
1866 	trace_types = type;
1867 	add_tracer_options(&global_trace, type);
1868 
1869  out:
1870 	tracing_selftest_running = false;
1871 	mutex_unlock(&trace_types_lock);
1872 
1873 	if (ret || !default_bootup_tracer)
1874 		goto out_unlock;
1875 
1876 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1877 		goto out_unlock;
1878 
1879 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1880 	/* Do we want this tracer to start on bootup? */
1881 	tracing_set_tracer(&global_trace, type->name);
1882 	default_bootup_tracer = NULL;
1883 
1884 	apply_trace_boot_options();
1885 
1886 	/* disable other selftests, since this will break it. */
1887 	tracing_selftest_disabled = true;
1888 #ifdef CONFIG_FTRACE_STARTUP_TEST
1889 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1890 	       type->name);
1891 #endif
1892 
1893  out_unlock:
1894 	return ret;
1895 }
1896 
tracing_reset_cpu(struct trace_buffer * buf,int cpu)1897 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1898 {
1899 	struct ring_buffer *buffer = buf->buffer;
1900 
1901 	if (!buffer)
1902 		return;
1903 
1904 	ring_buffer_record_disable(buffer);
1905 
1906 	/* Make sure all commits have finished */
1907 	synchronize_rcu();
1908 	ring_buffer_reset_cpu(buffer, cpu);
1909 
1910 	ring_buffer_record_enable(buffer);
1911 }
1912 
tracing_reset_online_cpus(struct trace_buffer * buf)1913 void tracing_reset_online_cpus(struct trace_buffer *buf)
1914 {
1915 	struct ring_buffer *buffer = buf->buffer;
1916 	int cpu;
1917 
1918 	if (!buffer)
1919 		return;
1920 
1921 	ring_buffer_record_disable(buffer);
1922 
1923 	/* Make sure all commits have finished */
1924 	synchronize_rcu();
1925 
1926 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1927 
1928 	for_each_online_cpu(cpu)
1929 		ring_buffer_reset_cpu(buffer, cpu);
1930 
1931 	ring_buffer_record_enable(buffer);
1932 }
1933 
1934 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)1935 void tracing_reset_all_online_cpus_unlocked(void)
1936 {
1937 	struct trace_array *tr;
1938 
1939 	lockdep_assert_held(&trace_types_lock);
1940 
1941 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1942 		if (!tr->clear_trace)
1943 			continue;
1944 		tr->clear_trace = false;
1945 		tracing_reset_online_cpus(&tr->trace_buffer);
1946 #ifdef CONFIG_TRACER_MAX_TRACE
1947 		tracing_reset_online_cpus(&tr->max_buffer);
1948 #endif
1949 	}
1950 }
1951 
tracing_reset_all_online_cpus(void)1952 void tracing_reset_all_online_cpus(void)
1953 {
1954 	mutex_lock(&trace_types_lock);
1955 	tracing_reset_all_online_cpus_unlocked();
1956 	mutex_unlock(&trace_types_lock);
1957 }
1958 
1959 /*
1960  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
1961  * is the tgid last observed corresponding to pid=i.
1962  */
1963 static int *tgid_map;
1964 
1965 /* The maximum valid index into tgid_map. */
1966 static size_t tgid_map_max;
1967 
1968 #define SAVED_CMDLINES_DEFAULT 128
1969 #define NO_CMDLINE_MAP UINT_MAX
1970 /*
1971  * Preemption must be disabled before acquiring trace_cmdline_lock.
1972  * The various trace_arrays' max_lock must be acquired in a context
1973  * where interrupt is disabled.
1974  */
1975 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1976 struct saved_cmdlines_buffer {
1977 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1978 	unsigned *map_cmdline_to_pid;
1979 	unsigned cmdline_num;
1980 	int cmdline_idx;
1981 	char saved_cmdlines[];
1982 };
1983 static struct saved_cmdlines_buffer *savedcmd;
1984 
get_saved_cmdlines(int idx)1985 static inline char *get_saved_cmdlines(int idx)
1986 {
1987 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1988 }
1989 
set_cmdline(int idx,const char * cmdline)1990 static inline void set_cmdline(int idx, const char *cmdline)
1991 {
1992 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1993 }
1994 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)1995 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
1996 {
1997 	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
1998 
1999 	kfree(s->map_cmdline_to_pid);
2000 	kmemleak_free(s);
2001 	free_pages((unsigned long)s, order);
2002 }
2003 
allocate_cmdlines_buffer(unsigned int val)2004 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2005 {
2006 	struct saved_cmdlines_buffer *s;
2007 	struct page *page;
2008 	int orig_size, size;
2009 	int order;
2010 
2011 	/* Figure out how much is needed to hold the given number of cmdlines */
2012 	orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2013 	order = get_order(orig_size);
2014 	size = 1 << (order + PAGE_SHIFT);
2015 	page = alloc_pages(GFP_KERNEL, order);
2016 	if (!page)
2017 		return NULL;
2018 
2019 	s = page_address(page);
2020 	kmemleak_alloc(s, size, 1, GFP_KERNEL);
2021 	memset(s, 0, sizeof(*s));
2022 
2023 	/* Round up to actual allocation */
2024 	val = (size - sizeof(*s)) / TASK_COMM_LEN;
2025 	s->cmdline_num = val;
2026 
2027 	s->map_cmdline_to_pid = kmalloc_array(val,
2028 					      sizeof(*s->map_cmdline_to_pid),
2029 					      GFP_KERNEL);
2030 	if (!s->map_cmdline_to_pid) {
2031 		free_saved_cmdlines_buffer(s);
2032 		return NULL;
2033 	}
2034 
2035 	s->cmdline_idx = 0;
2036 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2037 	       sizeof(s->map_pid_to_cmdline));
2038 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2039 	       val * sizeof(*s->map_cmdline_to_pid));
2040 
2041 	return s;
2042 }
2043 
trace_create_savedcmd(void)2044 static int trace_create_savedcmd(void)
2045 {
2046 	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2047 
2048 	return savedcmd ? 0 : -ENOMEM;
2049 }
2050 
is_tracing_stopped(void)2051 int is_tracing_stopped(void)
2052 {
2053 	return global_trace.stop_count;
2054 }
2055 
2056 /**
2057  * tracing_start - quick start of the tracer
2058  *
2059  * If tracing is enabled but was stopped by tracing_stop,
2060  * this will start the tracer back up.
2061  */
tracing_start(void)2062 void tracing_start(void)
2063 {
2064 	struct ring_buffer *buffer;
2065 	unsigned long flags;
2066 
2067 	if (tracing_disabled)
2068 		return;
2069 
2070 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2071 	if (--global_trace.stop_count) {
2072 		if (global_trace.stop_count < 0) {
2073 			/* Someone screwed up their debugging */
2074 			WARN_ON_ONCE(1);
2075 			global_trace.stop_count = 0;
2076 		}
2077 		goto out;
2078 	}
2079 
2080 	/* Prevent the buffers from switching */
2081 	arch_spin_lock(&global_trace.max_lock);
2082 
2083 	buffer = global_trace.trace_buffer.buffer;
2084 	if (buffer)
2085 		ring_buffer_record_enable(buffer);
2086 
2087 #ifdef CONFIG_TRACER_MAX_TRACE
2088 	buffer = global_trace.max_buffer.buffer;
2089 	if (buffer)
2090 		ring_buffer_record_enable(buffer);
2091 #endif
2092 
2093 	arch_spin_unlock(&global_trace.max_lock);
2094 
2095  out:
2096 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2097 }
2098 
tracing_start_tr(struct trace_array * tr)2099 static void tracing_start_tr(struct trace_array *tr)
2100 {
2101 	struct ring_buffer *buffer;
2102 	unsigned long flags;
2103 
2104 	if (tracing_disabled)
2105 		return;
2106 
2107 	/* If global, we need to also start the max tracer */
2108 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2109 		return tracing_start();
2110 
2111 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2112 
2113 	if (--tr->stop_count) {
2114 		if (tr->stop_count < 0) {
2115 			/* Someone screwed up their debugging */
2116 			WARN_ON_ONCE(1);
2117 			tr->stop_count = 0;
2118 		}
2119 		goto out;
2120 	}
2121 
2122 	buffer = tr->trace_buffer.buffer;
2123 	if (buffer)
2124 		ring_buffer_record_enable(buffer);
2125 
2126  out:
2127 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2128 }
2129 
2130 /**
2131  * tracing_stop - quick stop of the tracer
2132  *
2133  * Light weight way to stop tracing. Use in conjunction with
2134  * tracing_start.
2135  */
tracing_stop(void)2136 void tracing_stop(void)
2137 {
2138 	struct ring_buffer *buffer;
2139 	unsigned long flags;
2140 
2141 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2142 	if (global_trace.stop_count++)
2143 		goto out;
2144 
2145 	/* Prevent the buffers from switching */
2146 	arch_spin_lock(&global_trace.max_lock);
2147 
2148 	buffer = global_trace.trace_buffer.buffer;
2149 	if (buffer)
2150 		ring_buffer_record_disable(buffer);
2151 
2152 #ifdef CONFIG_TRACER_MAX_TRACE
2153 	buffer = global_trace.max_buffer.buffer;
2154 	if (buffer)
2155 		ring_buffer_record_disable(buffer);
2156 #endif
2157 
2158 	arch_spin_unlock(&global_trace.max_lock);
2159 
2160  out:
2161 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2162 }
2163 
tracing_stop_tr(struct trace_array * tr)2164 static void tracing_stop_tr(struct trace_array *tr)
2165 {
2166 	struct ring_buffer *buffer;
2167 	unsigned long flags;
2168 
2169 	/* If global, we need to also stop the max tracer */
2170 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2171 		return tracing_stop();
2172 
2173 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2174 	if (tr->stop_count++)
2175 		goto out;
2176 
2177 	buffer = tr->trace_buffer.buffer;
2178 	if (buffer)
2179 		ring_buffer_record_disable(buffer);
2180 
2181  out:
2182 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2183 }
2184 
trace_save_cmdline(struct task_struct * tsk)2185 static int trace_save_cmdline(struct task_struct *tsk)
2186 {
2187 	unsigned tpid, idx;
2188 
2189 	/* treat recording of idle task as a success */
2190 	if (!tsk->pid)
2191 		return 1;
2192 
2193 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2194 
2195 	/*
2196 	 * It's not the end of the world if we don't get
2197 	 * the lock, but we also don't want to spin
2198 	 * nor do we want to disable interrupts,
2199 	 * so if we miss here, then better luck next time.
2200 	 *
2201 	 * This is called within the scheduler and wake up, so interrupts
2202 	 * had better been disabled and run queue lock been held.
2203 	 */
2204 	if (!arch_spin_trylock(&trace_cmdline_lock))
2205 		return 0;
2206 
2207 	idx = savedcmd->map_pid_to_cmdline[tpid];
2208 	if (idx == NO_CMDLINE_MAP) {
2209 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2210 
2211 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2212 		savedcmd->cmdline_idx = idx;
2213 	}
2214 
2215 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2216 	set_cmdline(idx, tsk->comm);
2217 
2218 	arch_spin_unlock(&trace_cmdline_lock);
2219 
2220 	return 1;
2221 }
2222 
__trace_find_cmdline(int pid,char comm[])2223 static void __trace_find_cmdline(int pid, char comm[])
2224 {
2225 	unsigned map;
2226 	int tpid;
2227 
2228 	if (!pid) {
2229 		strcpy(comm, "<idle>");
2230 		return;
2231 	}
2232 
2233 	if (WARN_ON_ONCE(pid < 0)) {
2234 		strcpy(comm, "<XXX>");
2235 		return;
2236 	}
2237 
2238 	tpid = pid & (PID_MAX_DEFAULT - 1);
2239 	map = savedcmd->map_pid_to_cmdline[tpid];
2240 	if (map != NO_CMDLINE_MAP) {
2241 		tpid = savedcmd->map_cmdline_to_pid[map];
2242 		if (tpid == pid) {
2243 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2244 			return;
2245 		}
2246 	}
2247 	strcpy(comm, "<...>");
2248 }
2249 
trace_find_cmdline(int pid,char comm[])2250 void trace_find_cmdline(int pid, char comm[])
2251 {
2252 	preempt_disable();
2253 	arch_spin_lock(&trace_cmdline_lock);
2254 
2255 	__trace_find_cmdline(pid, comm);
2256 
2257 	arch_spin_unlock(&trace_cmdline_lock);
2258 	preempt_enable();
2259 }
2260 
trace_find_tgid_ptr(int pid)2261 static int *trace_find_tgid_ptr(int pid)
2262 {
2263 	/*
2264 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2265 	 * if we observe a non-NULL tgid_map then we also observe the correct
2266 	 * tgid_map_max.
2267 	 */
2268 	int *map = smp_load_acquire(&tgid_map);
2269 
2270 	if (unlikely(!map || pid > tgid_map_max))
2271 		return NULL;
2272 
2273 	return &map[pid];
2274 }
2275 
trace_find_tgid(int pid)2276 int trace_find_tgid(int pid)
2277 {
2278 	int *ptr = trace_find_tgid_ptr(pid);
2279 
2280 	return ptr ? *ptr : 0;
2281 }
2282 
trace_save_tgid(struct task_struct * tsk)2283 static int trace_save_tgid(struct task_struct *tsk)
2284 {
2285 	int *ptr;
2286 
2287 	/* treat recording of idle task as a success */
2288 	if (!tsk->pid)
2289 		return 1;
2290 
2291 	ptr = trace_find_tgid_ptr(tsk->pid);
2292 	if (!ptr)
2293 		return 0;
2294 
2295 	*ptr = tsk->tgid;
2296 	return 1;
2297 }
2298 
tracing_record_taskinfo_skip(int flags)2299 static bool tracing_record_taskinfo_skip(int flags)
2300 {
2301 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2302 		return true;
2303 	if (!__this_cpu_read(trace_taskinfo_save))
2304 		return true;
2305 	return false;
2306 }
2307 
2308 /**
2309  * tracing_record_taskinfo - record the task info of a task
2310  *
2311  * @task:  task to record
2312  * @flags: TRACE_RECORD_CMDLINE for recording comm
2313  *         TRACE_RECORD_TGID for recording tgid
2314  */
tracing_record_taskinfo(struct task_struct * task,int flags)2315 void tracing_record_taskinfo(struct task_struct *task, int flags)
2316 {
2317 	bool done;
2318 
2319 	if (tracing_record_taskinfo_skip(flags))
2320 		return;
2321 
2322 	/*
2323 	 * Record as much task information as possible. If some fail, continue
2324 	 * to try to record the others.
2325 	 */
2326 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2327 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2328 
2329 	/* If recording any information failed, retry again soon. */
2330 	if (!done)
2331 		return;
2332 
2333 	__this_cpu_write(trace_taskinfo_save, false);
2334 }
2335 
2336 /**
2337  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2338  *
2339  * @prev: previous task during sched_switch
2340  * @next: next task during sched_switch
2341  * @flags: TRACE_RECORD_CMDLINE for recording comm
2342  *         TRACE_RECORD_TGID for recording tgid
2343  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2344 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2345 					  struct task_struct *next, int flags)
2346 {
2347 	bool done;
2348 
2349 	if (tracing_record_taskinfo_skip(flags))
2350 		return;
2351 
2352 	/*
2353 	 * Record as much task information as possible. If some fail, continue
2354 	 * to try to record the others.
2355 	 */
2356 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2357 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2358 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2359 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2360 
2361 	/* If recording any information failed, retry again soon. */
2362 	if (!done)
2363 		return;
2364 
2365 	__this_cpu_write(trace_taskinfo_save, false);
2366 }
2367 
2368 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2369 void tracing_record_cmdline(struct task_struct *task)
2370 {
2371 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2372 }
2373 
tracing_record_tgid(struct task_struct * task)2374 void tracing_record_tgid(struct task_struct *task)
2375 {
2376 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2377 }
2378 
2379 /*
2380  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2381  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2382  * simplifies those functions and keeps them in sync.
2383  */
trace_handle_return(struct trace_seq * s)2384 enum print_line_t trace_handle_return(struct trace_seq *s)
2385 {
2386 	return trace_seq_has_overflowed(s) ?
2387 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2388 }
2389 EXPORT_SYMBOL_GPL(trace_handle_return);
2390 
2391 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2392 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2393 			     unsigned long flags, int pc)
2394 {
2395 	struct task_struct *tsk = current;
2396 
2397 	entry->preempt_count		= pc & 0xff;
2398 	entry->pid			= (tsk) ? tsk->pid : 0;
2399 	entry->type			= type;
2400 	entry->flags =
2401 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2402 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2403 #else
2404 		TRACE_FLAG_IRQS_NOSUPPORT |
2405 #endif
2406 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2407 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2408 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2409 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2410 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2411 }
2412 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2413 
2414 struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2415 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2416 			  int type,
2417 			  unsigned long len,
2418 			  unsigned long flags, int pc)
2419 {
2420 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2421 }
2422 
2423 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2424 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2425 static int trace_buffered_event_ref;
2426 
2427 /**
2428  * trace_buffered_event_enable - enable buffering events
2429  *
2430  * When events are being filtered, it is quicker to use a temporary
2431  * buffer to write the event data into if there's a likely chance
2432  * that it will not be committed. The discard of the ring buffer
2433  * is not as fast as committing, and is much slower than copying
2434  * a commit.
2435  *
2436  * When an event is to be filtered, allocate per cpu buffers to
2437  * write the event data into, and if the event is filtered and discarded
2438  * it is simply dropped, otherwise, the entire data is to be committed
2439  * in one shot.
2440  */
trace_buffered_event_enable(void)2441 void trace_buffered_event_enable(void)
2442 {
2443 	struct ring_buffer_event *event;
2444 	struct page *page;
2445 	int cpu;
2446 
2447 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2448 
2449 	if (trace_buffered_event_ref++)
2450 		return;
2451 
2452 	for_each_tracing_cpu(cpu) {
2453 		page = alloc_pages_node(cpu_to_node(cpu),
2454 					GFP_KERNEL | __GFP_NORETRY, 0);
2455 		/* This is just an optimization and can handle failures */
2456 		if (!page) {
2457 			pr_err("Failed to allocate event buffer\n");
2458 			break;
2459 		}
2460 
2461 		event = page_address(page);
2462 		memset(event, 0, sizeof(*event));
2463 
2464 		per_cpu(trace_buffered_event, cpu) = event;
2465 
2466 		preempt_disable();
2467 		if (cpu == smp_processor_id() &&
2468 		    this_cpu_read(trace_buffered_event) !=
2469 		    per_cpu(trace_buffered_event, cpu))
2470 			WARN_ON_ONCE(1);
2471 		preempt_enable();
2472 	}
2473 }
2474 
enable_trace_buffered_event(void * data)2475 static void enable_trace_buffered_event(void *data)
2476 {
2477 	/* Probably not needed, but do it anyway */
2478 	smp_rmb();
2479 	this_cpu_dec(trace_buffered_event_cnt);
2480 }
2481 
disable_trace_buffered_event(void * data)2482 static void disable_trace_buffered_event(void *data)
2483 {
2484 	this_cpu_inc(trace_buffered_event_cnt);
2485 }
2486 
2487 /**
2488  * trace_buffered_event_disable - disable buffering events
2489  *
2490  * When a filter is removed, it is faster to not use the buffered
2491  * events, and to commit directly into the ring buffer. Free up
2492  * the temp buffers when there are no more users. This requires
2493  * special synchronization with current events.
2494  */
trace_buffered_event_disable(void)2495 void trace_buffered_event_disable(void)
2496 {
2497 	int cpu;
2498 
2499 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2500 
2501 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2502 		return;
2503 
2504 	if (--trace_buffered_event_ref)
2505 		return;
2506 
2507 	/* For each CPU, set the buffer as used. */
2508 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2509 			 NULL, true);
2510 
2511 	/* Wait for all current users to finish */
2512 	synchronize_rcu();
2513 
2514 	for_each_tracing_cpu(cpu) {
2515 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2516 		per_cpu(trace_buffered_event, cpu) = NULL;
2517 	}
2518 
2519 	/*
2520 	 * Wait for all CPUs that potentially started checking if they can use
2521 	 * their event buffer only after the previous synchronize_rcu() call and
2522 	 * they still read a valid pointer from trace_buffered_event. It must be
2523 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2524 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2525 	 */
2526 	synchronize_rcu();
2527 
2528 	/* For each CPU, relinquish the buffer */
2529 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2530 			 true);
2531 }
2532 
2533 static struct ring_buffer *temp_buffer;
2534 
2535 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2536 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2537 			  struct trace_event_file *trace_file,
2538 			  int type, unsigned long len,
2539 			  unsigned long flags, int pc)
2540 {
2541 	struct ring_buffer_event *entry;
2542 	int val;
2543 
2544 	*current_rb = trace_file->tr->trace_buffer.buffer;
2545 
2546 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2547 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2548 	    (entry = this_cpu_read(trace_buffered_event))) {
2549 		/* Try to use the per cpu buffer first */
2550 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2551 		if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2552 			trace_event_setup(entry, type, flags, pc);
2553 			entry->array[0] = len;
2554 			return entry;
2555 		}
2556 		this_cpu_dec(trace_buffered_event_cnt);
2557 	}
2558 
2559 	entry = __trace_buffer_lock_reserve(*current_rb,
2560 					    type, len, flags, pc);
2561 	/*
2562 	 * If tracing is off, but we have triggers enabled
2563 	 * we still need to look at the event data. Use the temp_buffer
2564 	 * to store the trace event for the trigger to use. It's recursive
2565 	 * safe and will not be recorded anywhere.
2566 	 */
2567 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2568 		*current_rb = temp_buffer;
2569 		entry = __trace_buffer_lock_reserve(*current_rb,
2570 						    type, len, flags, pc);
2571 	}
2572 	return entry;
2573 }
2574 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2575 
2576 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2577 static DEFINE_MUTEX(tracepoint_printk_mutex);
2578 
output_printk(struct trace_event_buffer * fbuffer)2579 static void output_printk(struct trace_event_buffer *fbuffer)
2580 {
2581 	struct trace_event_call *event_call;
2582 	struct trace_event *event;
2583 	unsigned long flags;
2584 	struct trace_iterator *iter = tracepoint_print_iter;
2585 
2586 	/* We should never get here if iter is NULL */
2587 	if (WARN_ON_ONCE(!iter))
2588 		return;
2589 
2590 	event_call = fbuffer->trace_file->event_call;
2591 	if (!event_call || !event_call->event.funcs ||
2592 	    !event_call->event.funcs->trace)
2593 		return;
2594 
2595 	event = &fbuffer->trace_file->event_call->event;
2596 
2597 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2598 	trace_seq_init(&iter->seq);
2599 	iter->ent = fbuffer->entry;
2600 	event_call->event.funcs->trace(iter, 0, event);
2601 	trace_seq_putc(&iter->seq, 0);
2602 	printk("%s", iter->seq.buffer);
2603 
2604 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2605 }
2606 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2607 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2608 			     void __user *buffer, size_t *lenp,
2609 			     loff_t *ppos)
2610 {
2611 	int save_tracepoint_printk;
2612 	int ret;
2613 
2614 	mutex_lock(&tracepoint_printk_mutex);
2615 	save_tracepoint_printk = tracepoint_printk;
2616 
2617 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2618 
2619 	/*
2620 	 * This will force exiting early, as tracepoint_printk
2621 	 * is always zero when tracepoint_printk_iter is not allocated
2622 	 */
2623 	if (!tracepoint_print_iter)
2624 		tracepoint_printk = 0;
2625 
2626 	if (save_tracepoint_printk == tracepoint_printk)
2627 		goto out;
2628 
2629 	if (tracepoint_printk)
2630 		static_key_enable(&tracepoint_printk_key.key);
2631 	else
2632 		static_key_disable(&tracepoint_printk_key.key);
2633 
2634  out:
2635 	mutex_unlock(&tracepoint_printk_mutex);
2636 
2637 	return ret;
2638 }
2639 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2640 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2641 {
2642 	if (static_key_false(&tracepoint_printk_key.key))
2643 		output_printk(fbuffer);
2644 
2645 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2646 				    fbuffer->event, fbuffer->entry,
2647 				    fbuffer->flags, fbuffer->pc);
2648 }
2649 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2650 
2651 /*
2652  * Skip 3:
2653  *
2654  *   trace_buffer_unlock_commit_regs()
2655  *   trace_event_buffer_commit()
2656  *   trace_event_raw_event_xxx()
2657  */
2658 # define STACK_SKIP 3
2659 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2660 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2661 				     struct ring_buffer *buffer,
2662 				     struct ring_buffer_event *event,
2663 				     unsigned long flags, int pc,
2664 				     struct pt_regs *regs)
2665 {
2666 	__buffer_unlock_commit(buffer, event);
2667 
2668 	/*
2669 	 * If regs is not set, then skip the necessary functions.
2670 	 * Note, we can still get here via blktrace, wakeup tracer
2671 	 * and mmiotrace, but that's ok if they lose a function or
2672 	 * two. They are not that meaningful.
2673 	 */
2674 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2675 	ftrace_trace_userstack(tr, buffer, flags, pc);
2676 }
2677 
2678 /*
2679  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2680  */
2681 void
trace_buffer_unlock_commit_nostack(struct ring_buffer * buffer,struct ring_buffer_event * event)2682 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2683 				   struct ring_buffer_event *event)
2684 {
2685 	__buffer_unlock_commit(buffer, event);
2686 }
2687 
2688 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event)2689 trace_process_export(struct trace_export *export,
2690 	       struct ring_buffer_event *event)
2691 {
2692 	struct trace_entry *entry;
2693 	unsigned int size = 0;
2694 
2695 	entry = ring_buffer_event_data(event);
2696 	size = ring_buffer_event_length(event);
2697 	export->write(export, entry, size);
2698 }
2699 
2700 static DEFINE_MUTEX(ftrace_export_lock);
2701 
2702 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2703 
2704 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2705 
ftrace_exports_enable(void)2706 static inline void ftrace_exports_enable(void)
2707 {
2708 	static_branch_enable(&ftrace_exports_enabled);
2709 }
2710 
ftrace_exports_disable(void)2711 static inline void ftrace_exports_disable(void)
2712 {
2713 	static_branch_disable(&ftrace_exports_enabled);
2714 }
2715 
ftrace_exports(struct ring_buffer_event * event)2716 static void ftrace_exports(struct ring_buffer_event *event)
2717 {
2718 	struct trace_export *export;
2719 
2720 	preempt_disable_notrace();
2721 
2722 	export = rcu_dereference_raw_check(ftrace_exports_list);
2723 	while (export) {
2724 		trace_process_export(export, event);
2725 		export = rcu_dereference_raw_check(export->next);
2726 	}
2727 
2728 	preempt_enable_notrace();
2729 }
2730 
2731 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)2732 add_trace_export(struct trace_export **list, struct trace_export *export)
2733 {
2734 	rcu_assign_pointer(export->next, *list);
2735 	/*
2736 	 * We are entering export into the list but another
2737 	 * CPU might be walking that list. We need to make sure
2738 	 * the export->next pointer is valid before another CPU sees
2739 	 * the export pointer included into the list.
2740 	 */
2741 	rcu_assign_pointer(*list, export);
2742 }
2743 
2744 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)2745 rm_trace_export(struct trace_export **list, struct trace_export *export)
2746 {
2747 	struct trace_export **p;
2748 
2749 	for (p = list; *p != NULL; p = &(*p)->next)
2750 		if (*p == export)
2751 			break;
2752 
2753 	if (*p != export)
2754 		return -1;
2755 
2756 	rcu_assign_pointer(*p, (*p)->next);
2757 
2758 	return 0;
2759 }
2760 
2761 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)2762 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2763 {
2764 	if (*list == NULL)
2765 		ftrace_exports_enable();
2766 
2767 	add_trace_export(list, export);
2768 }
2769 
2770 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)2771 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2772 {
2773 	int ret;
2774 
2775 	ret = rm_trace_export(list, export);
2776 	if (*list == NULL)
2777 		ftrace_exports_disable();
2778 
2779 	return ret;
2780 }
2781 
register_ftrace_export(struct trace_export * export)2782 int register_ftrace_export(struct trace_export *export)
2783 {
2784 	if (WARN_ON_ONCE(!export->write))
2785 		return -1;
2786 
2787 	mutex_lock(&ftrace_export_lock);
2788 
2789 	add_ftrace_export(&ftrace_exports_list, export);
2790 
2791 	mutex_unlock(&ftrace_export_lock);
2792 
2793 	return 0;
2794 }
2795 EXPORT_SYMBOL_GPL(register_ftrace_export);
2796 
unregister_ftrace_export(struct trace_export * export)2797 int unregister_ftrace_export(struct trace_export *export)
2798 {
2799 	int ret;
2800 
2801 	mutex_lock(&ftrace_export_lock);
2802 
2803 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2804 
2805 	mutex_unlock(&ftrace_export_lock);
2806 
2807 	return ret;
2808 }
2809 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2810 
2811 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2812 trace_function(struct trace_array *tr,
2813 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2814 	       int pc)
2815 {
2816 	struct trace_event_call *call = &event_function;
2817 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2818 	struct ring_buffer_event *event;
2819 	struct ftrace_entry *entry;
2820 
2821 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2822 					    flags, pc);
2823 	if (!event)
2824 		return;
2825 	entry	= ring_buffer_event_data(event);
2826 	entry->ip			= ip;
2827 	entry->parent_ip		= parent_ip;
2828 
2829 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2830 		if (static_branch_unlikely(&ftrace_exports_enabled))
2831 			ftrace_exports(event);
2832 		__buffer_unlock_commit(buffer, event);
2833 	}
2834 }
2835 
2836 #ifdef CONFIG_STACKTRACE
2837 
2838 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2839 #define FTRACE_KSTACK_NESTING	4
2840 
2841 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2842 
2843 struct ftrace_stack {
2844 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2845 };
2846 
2847 
2848 struct ftrace_stacks {
2849 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2850 };
2851 
2852 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2853 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2854 
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2855 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2856 				 unsigned long flags,
2857 				 int skip, int pc, struct pt_regs *regs)
2858 {
2859 	struct trace_event_call *call = &event_kernel_stack;
2860 	struct ring_buffer_event *event;
2861 	unsigned int size, nr_entries;
2862 	struct ftrace_stack *fstack;
2863 	struct stack_entry *entry;
2864 	int stackidx;
2865 
2866 	/*
2867 	 * Add one, for this function and the call to save_stack_trace()
2868 	 * If regs is set, then these functions will not be in the way.
2869 	 */
2870 #ifndef CONFIG_UNWINDER_ORC
2871 	if (!regs)
2872 		skip++;
2873 #endif
2874 
2875 	/*
2876 	 * Since events can happen in NMIs there's no safe way to
2877 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2878 	 * or NMI comes in, it will just have to use the default
2879 	 * FTRACE_STACK_SIZE.
2880 	 */
2881 	preempt_disable_notrace();
2882 
2883 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2884 
2885 	/* This should never happen. If it does, yell once and skip */
2886 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2887 		goto out;
2888 
2889 	/*
2890 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2891 	 * interrupt will either see the value pre increment or post
2892 	 * increment. If the interrupt happens pre increment it will have
2893 	 * restored the counter when it returns.  We just need a barrier to
2894 	 * keep gcc from moving things around.
2895 	 */
2896 	barrier();
2897 
2898 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2899 	size = ARRAY_SIZE(fstack->calls);
2900 
2901 	if (regs) {
2902 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2903 						   size, skip);
2904 	} else {
2905 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2906 	}
2907 
2908 	size = nr_entries * sizeof(unsigned long);
2909 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2910 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
2911 				    flags, pc);
2912 	if (!event)
2913 		goto out;
2914 	entry = ring_buffer_event_data(event);
2915 
2916 	memcpy(&entry->caller, fstack->calls, size);
2917 	entry->size = nr_entries;
2918 
2919 	if (!call_filter_check_discard(call, entry, buffer, event))
2920 		__buffer_unlock_commit(buffer, event);
2921 
2922  out:
2923 	/* Again, don't let gcc optimize things here */
2924 	barrier();
2925 	__this_cpu_dec(ftrace_stack_reserve);
2926 	preempt_enable_notrace();
2927 
2928 }
2929 
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2930 static inline void ftrace_trace_stack(struct trace_array *tr,
2931 				      struct ring_buffer *buffer,
2932 				      unsigned long flags,
2933 				      int skip, int pc, struct pt_regs *regs)
2934 {
2935 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2936 		return;
2937 
2938 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2939 }
2940 
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)2941 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2942 		   int pc)
2943 {
2944 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2945 
2946 	if (rcu_is_watching()) {
2947 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2948 		return;
2949 	}
2950 
2951 	/*
2952 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2953 	 * but if the above rcu_is_watching() failed, then the NMI
2954 	 * triggered someplace critical, and rcu_irq_enter() should
2955 	 * not be called from NMI.
2956 	 */
2957 	if (unlikely(in_nmi()))
2958 		return;
2959 
2960 	rcu_irq_enter_irqson();
2961 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2962 	rcu_irq_exit_irqson();
2963 }
2964 
2965 /**
2966  * trace_dump_stack - record a stack back trace in the trace buffer
2967  * @skip: Number of functions to skip (helper handlers)
2968  */
trace_dump_stack(int skip)2969 void trace_dump_stack(int skip)
2970 {
2971 	unsigned long flags;
2972 
2973 	if (tracing_disabled || tracing_selftest_running)
2974 		return;
2975 
2976 	local_save_flags(flags);
2977 
2978 #ifndef CONFIG_UNWINDER_ORC
2979 	/* Skip 1 to skip this function. */
2980 	skip++;
2981 #endif
2982 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2983 			     flags, skip, preempt_count(), NULL);
2984 }
2985 EXPORT_SYMBOL_GPL(trace_dump_stack);
2986 
2987 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2988 static DEFINE_PER_CPU(int, user_stack_count);
2989 
2990 static void
ftrace_trace_userstack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int pc)2991 ftrace_trace_userstack(struct trace_array *tr,
2992 		       struct ring_buffer *buffer, unsigned long flags, int pc)
2993 {
2994 	struct trace_event_call *call = &event_user_stack;
2995 	struct ring_buffer_event *event;
2996 	struct userstack_entry *entry;
2997 
2998 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2999 		return;
3000 
3001 	/*
3002 	 * NMIs can not handle page faults, even with fix ups.
3003 	 * The save user stack can (and often does) fault.
3004 	 */
3005 	if (unlikely(in_nmi()))
3006 		return;
3007 
3008 	/*
3009 	 * prevent recursion, since the user stack tracing may
3010 	 * trigger other kernel events.
3011 	 */
3012 	preempt_disable();
3013 	if (__this_cpu_read(user_stack_count))
3014 		goto out;
3015 
3016 	__this_cpu_inc(user_stack_count);
3017 
3018 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3019 					    sizeof(*entry), flags, pc);
3020 	if (!event)
3021 		goto out_drop_count;
3022 	entry	= ring_buffer_event_data(event);
3023 
3024 	entry->tgid		= current->tgid;
3025 	memset(&entry->caller, 0, sizeof(entry->caller));
3026 
3027 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3028 	if (!call_filter_check_discard(call, entry, buffer, event))
3029 		__buffer_unlock_commit(buffer, event);
3030 
3031  out_drop_count:
3032 	__this_cpu_dec(user_stack_count);
3033  out:
3034 	preempt_enable();
3035 }
3036 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int pc)3037 static void ftrace_trace_userstack(struct trace_array *tr,
3038 				   struct ring_buffer *buffer,
3039 				   unsigned long flags, int pc)
3040 {
3041 }
3042 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3043 
3044 #endif /* CONFIG_STACKTRACE */
3045 
3046 /* created for use with alloc_percpu */
3047 struct trace_buffer_struct {
3048 	int nesting;
3049 	char buffer[4][TRACE_BUF_SIZE];
3050 };
3051 
3052 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3053 
3054 /*
3055  * Thise allows for lockless recording.  If we're nested too deeply, then
3056  * this returns NULL.
3057  */
get_trace_buf(void)3058 static char *get_trace_buf(void)
3059 {
3060 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3061 
3062 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3063 		return NULL;
3064 
3065 	buffer->nesting++;
3066 
3067 	/* Interrupts must see nesting incremented before we use the buffer */
3068 	barrier();
3069 	return &buffer->buffer[buffer->nesting - 1][0];
3070 }
3071 
put_trace_buf(void)3072 static void put_trace_buf(void)
3073 {
3074 	/* Don't let the decrement of nesting leak before this */
3075 	barrier();
3076 	this_cpu_dec(trace_percpu_buffer->nesting);
3077 }
3078 
alloc_percpu_trace_buffer(void)3079 static int alloc_percpu_trace_buffer(void)
3080 {
3081 	struct trace_buffer_struct __percpu *buffers;
3082 
3083 	buffers = alloc_percpu(struct trace_buffer_struct);
3084 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3085 		return -ENOMEM;
3086 
3087 	trace_percpu_buffer = buffers;
3088 	return 0;
3089 }
3090 
3091 static int buffers_allocated;
3092 
trace_printk_init_buffers(void)3093 void trace_printk_init_buffers(void)
3094 {
3095 	if (buffers_allocated)
3096 		return;
3097 
3098 	if (alloc_percpu_trace_buffer())
3099 		return;
3100 
3101 	/* trace_printk() is for debug use only. Don't use it in production. */
3102 
3103 	pr_warn("\n");
3104 	pr_warn("**********************************************************\n");
3105 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3106 	pr_warn("**                                                      **\n");
3107 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3108 	pr_warn("**                                                      **\n");
3109 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3110 	pr_warn("** unsafe for production use.                           **\n");
3111 	pr_warn("**                                                      **\n");
3112 	pr_warn("** If you see this message and you are not debugging    **\n");
3113 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3114 	pr_warn("**                                                      **\n");
3115 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3116 	pr_warn("**********************************************************\n");
3117 
3118 	/* Expand the buffers to set size */
3119 	tracing_update_buffers();
3120 
3121 	buffers_allocated = 1;
3122 
3123 	/*
3124 	 * trace_printk_init_buffers() can be called by modules.
3125 	 * If that happens, then we need to start cmdline recording
3126 	 * directly here. If the global_trace.buffer is already
3127 	 * allocated here, then this was called by module code.
3128 	 */
3129 	if (global_trace.trace_buffer.buffer)
3130 		tracing_start_cmdline_record();
3131 }
3132 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3133 
trace_printk_start_comm(void)3134 void trace_printk_start_comm(void)
3135 {
3136 	/* Start tracing comms if trace printk is set */
3137 	if (!buffers_allocated)
3138 		return;
3139 	tracing_start_cmdline_record();
3140 }
3141 
trace_printk_start_stop_comm(int enabled)3142 static void trace_printk_start_stop_comm(int enabled)
3143 {
3144 	if (!buffers_allocated)
3145 		return;
3146 
3147 	if (enabled)
3148 		tracing_start_cmdline_record();
3149 	else
3150 		tracing_stop_cmdline_record();
3151 }
3152 
3153 /**
3154  * trace_vbprintk - write binary msg to tracing buffer
3155  * @ip:    The address of the caller
3156  * @fmt:   The string format to write to the buffer
3157  * @args:  Arguments for @fmt
3158  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3159 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3160 {
3161 	struct trace_event_call *call = &event_bprint;
3162 	struct ring_buffer_event *event;
3163 	struct ring_buffer *buffer;
3164 	struct trace_array *tr = &global_trace;
3165 	struct bprint_entry *entry;
3166 	unsigned long flags;
3167 	char *tbuffer;
3168 	int len = 0, size, pc;
3169 
3170 	if (unlikely(tracing_selftest_running || tracing_disabled))
3171 		return 0;
3172 
3173 	/* Don't pollute graph traces with trace_vprintk internals */
3174 	pause_graph_tracing();
3175 
3176 	pc = preempt_count();
3177 	preempt_disable_notrace();
3178 
3179 	tbuffer = get_trace_buf();
3180 	if (!tbuffer) {
3181 		len = 0;
3182 		goto out_nobuffer;
3183 	}
3184 
3185 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3186 
3187 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3188 		goto out;
3189 
3190 	local_save_flags(flags);
3191 	size = sizeof(*entry) + sizeof(u32) * len;
3192 	buffer = tr->trace_buffer.buffer;
3193 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3194 					    flags, pc);
3195 	if (!event)
3196 		goto out;
3197 	entry = ring_buffer_event_data(event);
3198 	entry->ip			= ip;
3199 	entry->fmt			= fmt;
3200 
3201 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3202 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3203 		__buffer_unlock_commit(buffer, event);
3204 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3205 	}
3206 
3207 out:
3208 	put_trace_buf();
3209 
3210 out_nobuffer:
3211 	preempt_enable_notrace();
3212 	unpause_graph_tracing();
3213 
3214 	return len;
3215 }
3216 EXPORT_SYMBOL_GPL(trace_vbprintk);
3217 
3218 __printf(3, 0)
3219 static int
__trace_array_vprintk(struct ring_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3220 __trace_array_vprintk(struct ring_buffer *buffer,
3221 		      unsigned long ip, const char *fmt, va_list args)
3222 {
3223 	struct trace_event_call *call = &event_print;
3224 	struct ring_buffer_event *event;
3225 	int len = 0, size, pc;
3226 	struct print_entry *entry;
3227 	unsigned long flags;
3228 	char *tbuffer;
3229 
3230 	if (tracing_disabled || tracing_selftest_running)
3231 		return 0;
3232 
3233 	/* Don't pollute graph traces with trace_vprintk internals */
3234 	pause_graph_tracing();
3235 
3236 	pc = preempt_count();
3237 	preempt_disable_notrace();
3238 
3239 
3240 	tbuffer = get_trace_buf();
3241 	if (!tbuffer) {
3242 		len = 0;
3243 		goto out_nobuffer;
3244 	}
3245 
3246 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3247 
3248 	local_save_flags(flags);
3249 	size = sizeof(*entry) + len + 1;
3250 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3251 					    flags, pc);
3252 	if (!event)
3253 		goto out;
3254 	entry = ring_buffer_event_data(event);
3255 	entry->ip = ip;
3256 
3257 	memcpy(&entry->buf, tbuffer, len + 1);
3258 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3259 		__buffer_unlock_commit(buffer, event);
3260 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3261 	}
3262 
3263 out:
3264 	put_trace_buf();
3265 
3266 out_nobuffer:
3267 	preempt_enable_notrace();
3268 	unpause_graph_tracing();
3269 
3270 	return len;
3271 }
3272 
3273 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3274 int trace_array_vprintk(struct trace_array *tr,
3275 			unsigned long ip, const char *fmt, va_list args)
3276 {
3277 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3278 }
3279 
3280 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3281 int trace_array_printk(struct trace_array *tr,
3282 		       unsigned long ip, const char *fmt, ...)
3283 {
3284 	int ret;
3285 	va_list ap;
3286 
3287 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3288 		return 0;
3289 
3290 	if (!tr)
3291 		return -ENOENT;
3292 
3293 	va_start(ap, fmt);
3294 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3295 	va_end(ap);
3296 	return ret;
3297 }
3298 EXPORT_SYMBOL_GPL(trace_array_printk);
3299 
3300 __printf(3, 4)
trace_array_printk_buf(struct ring_buffer * buffer,unsigned long ip,const char * fmt,...)3301 int trace_array_printk_buf(struct ring_buffer *buffer,
3302 			   unsigned long ip, const char *fmt, ...)
3303 {
3304 	int ret;
3305 	va_list ap;
3306 
3307 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3308 		return 0;
3309 
3310 	va_start(ap, fmt);
3311 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3312 	va_end(ap);
3313 	return ret;
3314 }
3315 
3316 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3317 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3318 {
3319 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3320 }
3321 EXPORT_SYMBOL_GPL(trace_vprintk);
3322 
trace_iterator_increment(struct trace_iterator * iter)3323 static void trace_iterator_increment(struct trace_iterator *iter)
3324 {
3325 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3326 
3327 	iter->idx++;
3328 	if (buf_iter)
3329 		ring_buffer_read(buf_iter, NULL);
3330 }
3331 
3332 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3333 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3334 		unsigned long *lost_events)
3335 {
3336 	struct ring_buffer_event *event;
3337 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3338 
3339 	if (buf_iter)
3340 		event = ring_buffer_iter_peek(buf_iter, ts);
3341 	else
3342 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3343 					 lost_events);
3344 
3345 	if (event) {
3346 		iter->ent_size = ring_buffer_event_length(event);
3347 		return ring_buffer_event_data(event);
3348 	}
3349 	iter->ent_size = 0;
3350 	return NULL;
3351 }
3352 
3353 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3354 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3355 		  unsigned long *missing_events, u64 *ent_ts)
3356 {
3357 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3358 	struct trace_entry *ent, *next = NULL;
3359 	unsigned long lost_events = 0, next_lost = 0;
3360 	int cpu_file = iter->cpu_file;
3361 	u64 next_ts = 0, ts;
3362 	int next_cpu = -1;
3363 	int next_size = 0;
3364 	int cpu;
3365 
3366 	/*
3367 	 * If we are in a per_cpu trace file, don't bother by iterating over
3368 	 * all cpu and peek directly.
3369 	 */
3370 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3371 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3372 			return NULL;
3373 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3374 		if (ent_cpu)
3375 			*ent_cpu = cpu_file;
3376 
3377 		return ent;
3378 	}
3379 
3380 	for_each_tracing_cpu(cpu) {
3381 
3382 		if (ring_buffer_empty_cpu(buffer, cpu))
3383 			continue;
3384 
3385 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3386 
3387 		/*
3388 		 * Pick the entry with the smallest timestamp:
3389 		 */
3390 		if (ent && (!next || ts < next_ts)) {
3391 			next = ent;
3392 			next_cpu = cpu;
3393 			next_ts = ts;
3394 			next_lost = lost_events;
3395 			next_size = iter->ent_size;
3396 		}
3397 	}
3398 
3399 	iter->ent_size = next_size;
3400 
3401 	if (ent_cpu)
3402 		*ent_cpu = next_cpu;
3403 
3404 	if (ent_ts)
3405 		*ent_ts = next_ts;
3406 
3407 	if (missing_events)
3408 		*missing_events = next_lost;
3409 
3410 	return next;
3411 }
3412 
3413 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3414 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3415 					  int *ent_cpu, u64 *ent_ts)
3416 {
3417 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3418 }
3419 
3420 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3421 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3422 {
3423 	iter->ent = __find_next_entry(iter, &iter->cpu,
3424 				      &iter->lost_events, &iter->ts);
3425 
3426 	if (iter->ent)
3427 		trace_iterator_increment(iter);
3428 
3429 	return iter->ent ? iter : NULL;
3430 }
3431 
trace_consume(struct trace_iterator * iter)3432 static void trace_consume(struct trace_iterator *iter)
3433 {
3434 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3435 			    &iter->lost_events);
3436 }
3437 
s_next(struct seq_file * m,void * v,loff_t * pos)3438 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3439 {
3440 	struct trace_iterator *iter = m->private;
3441 	int i = (int)*pos;
3442 	void *ent;
3443 
3444 	WARN_ON_ONCE(iter->leftover);
3445 
3446 	(*pos)++;
3447 
3448 	/* can't go backwards */
3449 	if (iter->idx > i)
3450 		return NULL;
3451 
3452 	if (iter->idx < 0)
3453 		ent = trace_find_next_entry_inc(iter);
3454 	else
3455 		ent = iter;
3456 
3457 	while (ent && iter->idx < i)
3458 		ent = trace_find_next_entry_inc(iter);
3459 
3460 	iter->pos = *pos;
3461 
3462 	return ent;
3463 }
3464 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3465 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3466 {
3467 	struct ring_buffer_event *event;
3468 	struct ring_buffer_iter *buf_iter;
3469 	unsigned long entries = 0;
3470 	u64 ts;
3471 
3472 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3473 
3474 	buf_iter = trace_buffer_iter(iter, cpu);
3475 	if (!buf_iter)
3476 		return;
3477 
3478 	ring_buffer_iter_reset(buf_iter);
3479 
3480 	/*
3481 	 * We could have the case with the max latency tracers
3482 	 * that a reset never took place on a cpu. This is evident
3483 	 * by the timestamp being before the start of the buffer.
3484 	 */
3485 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3486 		if (ts >= iter->trace_buffer->time_start)
3487 			break;
3488 		entries++;
3489 		ring_buffer_read(buf_iter, NULL);
3490 	}
3491 
3492 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3493 }
3494 
3495 /*
3496  * The current tracer is copied to avoid a global locking
3497  * all around.
3498  */
s_start(struct seq_file * m,loff_t * pos)3499 static void *s_start(struct seq_file *m, loff_t *pos)
3500 {
3501 	struct trace_iterator *iter = m->private;
3502 	struct trace_array *tr = iter->tr;
3503 	int cpu_file = iter->cpu_file;
3504 	void *p = NULL;
3505 	loff_t l = 0;
3506 	int cpu;
3507 
3508 	/*
3509 	 * copy the tracer to avoid using a global lock all around.
3510 	 * iter->trace is a copy of current_trace, the pointer to the
3511 	 * name may be used instead of a strcmp(), as iter->trace->name
3512 	 * will point to the same string as current_trace->name.
3513 	 */
3514 	mutex_lock(&trace_types_lock);
3515 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
3516 		/* Close iter->trace before switching to the new current tracer */
3517 		if (iter->trace->close)
3518 			iter->trace->close(iter);
3519 		*iter->trace = *tr->current_trace;
3520 		/* Reopen the new current tracer */
3521 		if (iter->trace->open)
3522 			iter->trace->open(iter);
3523 	}
3524 	mutex_unlock(&trace_types_lock);
3525 
3526 #ifdef CONFIG_TRACER_MAX_TRACE
3527 	if (iter->snapshot && iter->trace->use_max_tr)
3528 		return ERR_PTR(-EBUSY);
3529 #endif
3530 
3531 	if (*pos != iter->pos) {
3532 		iter->ent = NULL;
3533 		iter->cpu = 0;
3534 		iter->idx = -1;
3535 
3536 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3537 			for_each_tracing_cpu(cpu)
3538 				tracing_iter_reset(iter, cpu);
3539 		} else
3540 			tracing_iter_reset(iter, cpu_file);
3541 
3542 		iter->leftover = 0;
3543 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3544 			;
3545 
3546 	} else {
3547 		/*
3548 		 * If we overflowed the seq_file before, then we want
3549 		 * to just reuse the trace_seq buffer again.
3550 		 */
3551 		if (iter->leftover)
3552 			p = iter;
3553 		else {
3554 			l = *pos - 1;
3555 			p = s_next(m, p, &l);
3556 		}
3557 	}
3558 
3559 	trace_event_read_lock();
3560 	trace_access_lock(cpu_file);
3561 	return p;
3562 }
3563 
s_stop(struct seq_file * m,void * p)3564 static void s_stop(struct seq_file *m, void *p)
3565 {
3566 	struct trace_iterator *iter = m->private;
3567 
3568 #ifdef CONFIG_TRACER_MAX_TRACE
3569 	if (iter->snapshot && iter->trace->use_max_tr)
3570 		return;
3571 #endif
3572 
3573 	trace_access_unlock(iter->cpu_file);
3574 	trace_event_read_unlock();
3575 }
3576 
3577 static void
get_total_entries_cpu(struct trace_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3578 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3579 		      unsigned long *entries, int cpu)
3580 {
3581 	unsigned long count;
3582 
3583 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3584 	/*
3585 	 * If this buffer has skipped entries, then we hold all
3586 	 * entries for the trace and we need to ignore the
3587 	 * ones before the time stamp.
3588 	 */
3589 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3590 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3591 		/* total is the same as the entries */
3592 		*total = count;
3593 	} else
3594 		*total = count +
3595 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3596 	*entries = count;
3597 }
3598 
3599 static void
get_total_entries(struct trace_buffer * buf,unsigned long * total,unsigned long * entries)3600 get_total_entries(struct trace_buffer *buf,
3601 		  unsigned long *total, unsigned long *entries)
3602 {
3603 	unsigned long t, e;
3604 	int cpu;
3605 
3606 	*total = 0;
3607 	*entries = 0;
3608 
3609 	for_each_tracing_cpu(cpu) {
3610 		get_total_entries_cpu(buf, &t, &e, cpu);
3611 		*total += t;
3612 		*entries += e;
3613 	}
3614 }
3615 
trace_total_entries_cpu(struct trace_array * tr,int cpu)3616 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3617 {
3618 	unsigned long total, entries;
3619 
3620 	if (!tr)
3621 		tr = &global_trace;
3622 
3623 	get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3624 
3625 	return entries;
3626 }
3627 
trace_total_entries(struct trace_array * tr)3628 unsigned long trace_total_entries(struct trace_array *tr)
3629 {
3630 	unsigned long total, entries;
3631 
3632 	if (!tr)
3633 		tr = &global_trace;
3634 
3635 	get_total_entries(&tr->trace_buffer, &total, &entries);
3636 
3637 	return entries;
3638 }
3639 
print_lat_help_header(struct seq_file * m)3640 static void print_lat_help_header(struct seq_file *m)
3641 {
3642 	seq_puts(m, "#                    _------=> CPU#            \n"
3643 		    "#                   / _-----=> irqs-off        \n"
3644 		    "#                  | / _----=> need-resched    \n"
3645 		    "#                  || / _---=> hardirq/softirq \n"
3646 		    "#                  ||| / _--=> preempt-depth   \n"
3647 		    "#                  |||| /     delay            \n"
3648 		    "#  cmd     pid     ||||| time  |   caller      \n"
3649 		    "#     \\   /        |||||  \\    |   /         \n");
3650 }
3651 
print_event_info(struct trace_buffer * buf,struct seq_file * m)3652 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3653 {
3654 	unsigned long total;
3655 	unsigned long entries;
3656 
3657 	get_total_entries(buf, &total, &entries);
3658 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3659 		   entries, total, num_online_cpus());
3660 	seq_puts(m, "#\n");
3661 }
3662 
print_func_help_header(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3663 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3664 				   unsigned int flags)
3665 {
3666 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3667 
3668 	print_event_info(buf, m);
3669 
3670 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3671 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3672 }
3673 
print_func_help_header_irq(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3674 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3675 				       unsigned int flags)
3676 {
3677 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3678 	const char *space = "            ";
3679 	int prec = tgid ? 12 : 2;
3680 
3681 	print_event_info(buf, m);
3682 
3683 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3684 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3685 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3686 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3687 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3688 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3689 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3690 }
3691 
3692 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3693 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3694 {
3695 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3696 	struct trace_buffer *buf = iter->trace_buffer;
3697 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3698 	struct tracer *type = iter->trace;
3699 	unsigned long entries;
3700 	unsigned long total;
3701 	const char *name = "preemption";
3702 
3703 	name = type->name;
3704 
3705 	get_total_entries(buf, &total, &entries);
3706 
3707 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3708 		   name, UTS_RELEASE);
3709 	seq_puts(m, "# -----------------------------------"
3710 		 "---------------------------------\n");
3711 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3712 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3713 		   nsecs_to_usecs(data->saved_latency),
3714 		   entries,
3715 		   total,
3716 		   buf->cpu,
3717 #if defined(CONFIG_PREEMPT_NONE)
3718 		   "server",
3719 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3720 		   "desktop",
3721 #elif defined(CONFIG_PREEMPT)
3722 		   "preempt",
3723 #else
3724 		   "unknown",
3725 #endif
3726 		   /* These are reserved for later use */
3727 		   0, 0, 0, 0);
3728 #ifdef CONFIG_SMP
3729 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3730 #else
3731 	seq_puts(m, ")\n");
3732 #endif
3733 	seq_puts(m, "#    -----------------\n");
3734 	seq_printf(m, "#    | task: %.16s-%d "
3735 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3736 		   data->comm, data->pid,
3737 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3738 		   data->policy, data->rt_priority);
3739 	seq_puts(m, "#    -----------------\n");
3740 
3741 	if (data->critical_start) {
3742 		seq_puts(m, "#  => started at: ");
3743 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3744 		trace_print_seq(m, &iter->seq);
3745 		seq_puts(m, "\n#  => ended at:   ");
3746 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3747 		trace_print_seq(m, &iter->seq);
3748 		seq_puts(m, "\n#\n");
3749 	}
3750 
3751 	seq_puts(m, "#\n");
3752 }
3753 
test_cpu_buff_start(struct trace_iterator * iter)3754 static void test_cpu_buff_start(struct trace_iterator *iter)
3755 {
3756 	struct trace_seq *s = &iter->seq;
3757 	struct trace_array *tr = iter->tr;
3758 
3759 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3760 		return;
3761 
3762 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3763 		return;
3764 
3765 	if (cpumask_available(iter->started) &&
3766 	    cpumask_test_cpu(iter->cpu, iter->started))
3767 		return;
3768 
3769 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3770 		return;
3771 
3772 	if (cpumask_available(iter->started))
3773 		cpumask_set_cpu(iter->cpu, iter->started);
3774 
3775 	/* Don't print started cpu buffer for the first entry of the trace */
3776 	if (iter->idx > 1)
3777 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3778 				iter->cpu);
3779 }
3780 
print_trace_fmt(struct trace_iterator * iter)3781 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3782 {
3783 	struct trace_array *tr = iter->tr;
3784 	struct trace_seq *s = &iter->seq;
3785 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3786 	struct trace_entry *entry;
3787 	struct trace_event *event;
3788 
3789 	entry = iter->ent;
3790 
3791 	test_cpu_buff_start(iter);
3792 
3793 	event = ftrace_find_event(entry->type);
3794 
3795 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3796 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3797 			trace_print_lat_context(iter);
3798 		else
3799 			trace_print_context(iter);
3800 	}
3801 
3802 	if (trace_seq_has_overflowed(s))
3803 		return TRACE_TYPE_PARTIAL_LINE;
3804 
3805 	if (event)
3806 		return event->funcs->trace(iter, sym_flags, event);
3807 
3808 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3809 
3810 	return trace_handle_return(s);
3811 }
3812 
print_raw_fmt(struct trace_iterator * iter)3813 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3814 {
3815 	struct trace_array *tr = iter->tr;
3816 	struct trace_seq *s = &iter->seq;
3817 	struct trace_entry *entry;
3818 	struct trace_event *event;
3819 
3820 	entry = iter->ent;
3821 
3822 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3823 		trace_seq_printf(s, "%d %d %llu ",
3824 				 entry->pid, iter->cpu, iter->ts);
3825 
3826 	if (trace_seq_has_overflowed(s))
3827 		return TRACE_TYPE_PARTIAL_LINE;
3828 
3829 	event = ftrace_find_event(entry->type);
3830 	if (event)
3831 		return event->funcs->raw(iter, 0, event);
3832 
3833 	trace_seq_printf(s, "%d ?\n", entry->type);
3834 
3835 	return trace_handle_return(s);
3836 }
3837 
print_hex_fmt(struct trace_iterator * iter)3838 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3839 {
3840 	struct trace_array *tr = iter->tr;
3841 	struct trace_seq *s = &iter->seq;
3842 	unsigned char newline = '\n';
3843 	struct trace_entry *entry;
3844 	struct trace_event *event;
3845 
3846 	entry = iter->ent;
3847 
3848 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3849 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3850 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3851 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3852 		if (trace_seq_has_overflowed(s))
3853 			return TRACE_TYPE_PARTIAL_LINE;
3854 	}
3855 
3856 	event = ftrace_find_event(entry->type);
3857 	if (event) {
3858 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3859 		if (ret != TRACE_TYPE_HANDLED)
3860 			return ret;
3861 	}
3862 
3863 	SEQ_PUT_FIELD(s, newline);
3864 
3865 	return trace_handle_return(s);
3866 }
3867 
print_bin_fmt(struct trace_iterator * iter)3868 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3869 {
3870 	struct trace_array *tr = iter->tr;
3871 	struct trace_seq *s = &iter->seq;
3872 	struct trace_entry *entry;
3873 	struct trace_event *event;
3874 
3875 	entry = iter->ent;
3876 
3877 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3878 		SEQ_PUT_FIELD(s, entry->pid);
3879 		SEQ_PUT_FIELD(s, iter->cpu);
3880 		SEQ_PUT_FIELD(s, iter->ts);
3881 		if (trace_seq_has_overflowed(s))
3882 			return TRACE_TYPE_PARTIAL_LINE;
3883 	}
3884 
3885 	event = ftrace_find_event(entry->type);
3886 	return event ? event->funcs->binary(iter, 0, event) :
3887 		TRACE_TYPE_HANDLED;
3888 }
3889 
trace_empty(struct trace_iterator * iter)3890 int trace_empty(struct trace_iterator *iter)
3891 {
3892 	struct ring_buffer_iter *buf_iter;
3893 	int cpu;
3894 
3895 	/* If we are looking at one CPU buffer, only check that one */
3896 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3897 		cpu = iter->cpu_file;
3898 		buf_iter = trace_buffer_iter(iter, cpu);
3899 		if (buf_iter) {
3900 			if (!ring_buffer_iter_empty(buf_iter))
3901 				return 0;
3902 		} else {
3903 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3904 				return 0;
3905 		}
3906 		return 1;
3907 	}
3908 
3909 	for_each_tracing_cpu(cpu) {
3910 		buf_iter = trace_buffer_iter(iter, cpu);
3911 		if (buf_iter) {
3912 			if (!ring_buffer_iter_empty(buf_iter))
3913 				return 0;
3914 		} else {
3915 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3916 				return 0;
3917 		}
3918 	}
3919 
3920 	return 1;
3921 }
3922 
3923 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3924 enum print_line_t print_trace_line(struct trace_iterator *iter)
3925 {
3926 	struct trace_array *tr = iter->tr;
3927 	unsigned long trace_flags = tr->trace_flags;
3928 	enum print_line_t ret;
3929 
3930 	if (iter->lost_events) {
3931 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3932 				 iter->cpu, iter->lost_events);
3933 		if (trace_seq_has_overflowed(&iter->seq))
3934 			return TRACE_TYPE_PARTIAL_LINE;
3935 	}
3936 
3937 	if (iter->trace && iter->trace->print_line) {
3938 		ret = iter->trace->print_line(iter);
3939 		if (ret != TRACE_TYPE_UNHANDLED)
3940 			return ret;
3941 	}
3942 
3943 	if (iter->ent->type == TRACE_BPUTS &&
3944 			trace_flags & TRACE_ITER_PRINTK &&
3945 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3946 		return trace_print_bputs_msg_only(iter);
3947 
3948 	if (iter->ent->type == TRACE_BPRINT &&
3949 			trace_flags & TRACE_ITER_PRINTK &&
3950 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3951 		return trace_print_bprintk_msg_only(iter);
3952 
3953 	if (iter->ent->type == TRACE_PRINT &&
3954 			trace_flags & TRACE_ITER_PRINTK &&
3955 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3956 		return trace_print_printk_msg_only(iter);
3957 
3958 	if (trace_flags & TRACE_ITER_BIN)
3959 		return print_bin_fmt(iter);
3960 
3961 	if (trace_flags & TRACE_ITER_HEX)
3962 		return print_hex_fmt(iter);
3963 
3964 	if (trace_flags & TRACE_ITER_RAW)
3965 		return print_raw_fmt(iter);
3966 
3967 	return print_trace_fmt(iter);
3968 }
3969 
trace_latency_header(struct seq_file * m)3970 void trace_latency_header(struct seq_file *m)
3971 {
3972 	struct trace_iterator *iter = m->private;
3973 	struct trace_array *tr = iter->tr;
3974 
3975 	/* print nothing if the buffers are empty */
3976 	if (trace_empty(iter))
3977 		return;
3978 
3979 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3980 		print_trace_header(m, iter);
3981 
3982 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3983 		print_lat_help_header(m);
3984 }
3985 
trace_default_header(struct seq_file * m)3986 void trace_default_header(struct seq_file *m)
3987 {
3988 	struct trace_iterator *iter = m->private;
3989 	struct trace_array *tr = iter->tr;
3990 	unsigned long trace_flags = tr->trace_flags;
3991 
3992 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3993 		return;
3994 
3995 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3996 		/* print nothing if the buffers are empty */
3997 		if (trace_empty(iter))
3998 			return;
3999 		print_trace_header(m, iter);
4000 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4001 			print_lat_help_header(m);
4002 	} else {
4003 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4004 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4005 				print_func_help_header_irq(iter->trace_buffer,
4006 							   m, trace_flags);
4007 			else
4008 				print_func_help_header(iter->trace_buffer, m,
4009 						       trace_flags);
4010 		}
4011 	}
4012 }
4013 
test_ftrace_alive(struct seq_file * m)4014 static void test_ftrace_alive(struct seq_file *m)
4015 {
4016 	if (!ftrace_is_dead())
4017 		return;
4018 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4019 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4020 }
4021 
4022 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4023 static void show_snapshot_main_help(struct seq_file *m)
4024 {
4025 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4026 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4027 		    "#                      Takes a snapshot of the main buffer.\n"
4028 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4029 		    "#                      (Doesn't have to be '2' works with any number that\n"
4030 		    "#                       is not a '0' or '1')\n");
4031 }
4032 
show_snapshot_percpu_help(struct seq_file * m)4033 static void show_snapshot_percpu_help(struct seq_file *m)
4034 {
4035 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4036 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4037 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4038 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4039 #else
4040 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4041 		    "#                     Must use main snapshot file to allocate.\n");
4042 #endif
4043 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4044 		    "#                      (Doesn't have to be '2' works with any number that\n"
4045 		    "#                       is not a '0' or '1')\n");
4046 }
4047 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4048 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4049 {
4050 	if (iter->tr->allocated_snapshot)
4051 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4052 	else
4053 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4054 
4055 	seq_puts(m, "# Snapshot commands:\n");
4056 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4057 		show_snapshot_main_help(m);
4058 	else
4059 		show_snapshot_percpu_help(m);
4060 }
4061 #else
4062 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4063 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4064 #endif
4065 
s_show(struct seq_file * m,void * v)4066 static int s_show(struct seq_file *m, void *v)
4067 {
4068 	struct trace_iterator *iter = v;
4069 	int ret;
4070 
4071 	if (iter->ent == NULL) {
4072 		if (iter->tr) {
4073 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4074 			seq_puts(m, "#\n");
4075 			test_ftrace_alive(m);
4076 		}
4077 		if (iter->snapshot && trace_empty(iter))
4078 			print_snapshot_help(m, iter);
4079 		else if (iter->trace && iter->trace->print_header)
4080 			iter->trace->print_header(m);
4081 		else
4082 			trace_default_header(m);
4083 
4084 	} else if (iter->leftover) {
4085 		/*
4086 		 * If we filled the seq_file buffer earlier, we
4087 		 * want to just show it now.
4088 		 */
4089 		ret = trace_print_seq(m, &iter->seq);
4090 
4091 		/* ret should this time be zero, but you never know */
4092 		iter->leftover = ret;
4093 
4094 	} else {
4095 		ret = print_trace_line(iter);
4096 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4097 			iter->seq.full = 0;
4098 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4099 		}
4100 		ret = trace_print_seq(m, &iter->seq);
4101 		/*
4102 		 * If we overflow the seq_file buffer, then it will
4103 		 * ask us for this data again at start up.
4104 		 * Use that instead.
4105 		 *  ret is 0 if seq_file write succeeded.
4106 		 *        -1 otherwise.
4107 		 */
4108 		iter->leftover = ret;
4109 	}
4110 
4111 	return 0;
4112 }
4113 
4114 /*
4115  * Should be used after trace_array_get(), trace_types_lock
4116  * ensures that i_cdev was already initialized.
4117  */
tracing_get_cpu(struct inode * inode)4118 static inline int tracing_get_cpu(struct inode *inode)
4119 {
4120 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4121 		return (long)inode->i_cdev - 1;
4122 	return RING_BUFFER_ALL_CPUS;
4123 }
4124 
4125 static const struct seq_operations tracer_seq_ops = {
4126 	.start		= s_start,
4127 	.next		= s_next,
4128 	.stop		= s_stop,
4129 	.show		= s_show,
4130 };
4131 
4132 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4133 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4134 {
4135 	struct trace_array *tr = inode->i_private;
4136 	struct trace_iterator *iter;
4137 	int cpu;
4138 
4139 	if (tracing_disabled)
4140 		return ERR_PTR(-ENODEV);
4141 
4142 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4143 	if (!iter)
4144 		return ERR_PTR(-ENOMEM);
4145 
4146 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4147 				    GFP_KERNEL);
4148 	if (!iter->buffer_iter)
4149 		goto release;
4150 
4151 	/*
4152 	 * We make a copy of the current tracer to avoid concurrent
4153 	 * changes on it while we are reading.
4154 	 */
4155 	mutex_lock(&trace_types_lock);
4156 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4157 	if (!iter->trace)
4158 		goto fail;
4159 
4160 	*iter->trace = *tr->current_trace;
4161 
4162 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4163 		goto fail;
4164 
4165 	iter->tr = tr;
4166 
4167 #ifdef CONFIG_TRACER_MAX_TRACE
4168 	/* Currently only the top directory has a snapshot */
4169 	if (tr->current_trace->print_max || snapshot)
4170 		iter->trace_buffer = &tr->max_buffer;
4171 	else
4172 #endif
4173 		iter->trace_buffer = &tr->trace_buffer;
4174 	iter->snapshot = snapshot;
4175 	iter->pos = -1;
4176 	iter->cpu_file = tracing_get_cpu(inode);
4177 	mutex_init(&iter->mutex);
4178 
4179 	/* Notify the tracer early; before we stop tracing. */
4180 	if (iter->trace && iter->trace->open)
4181 		iter->trace->open(iter);
4182 
4183 	/* Annotate start of buffers if we had overruns */
4184 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4185 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4186 
4187 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4188 	if (trace_clocks[tr->clock_id].in_ns)
4189 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4190 
4191 	/* stop the trace while dumping if we are not opening "snapshot" */
4192 	if (!iter->snapshot)
4193 		tracing_stop_tr(tr);
4194 
4195 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4196 		for_each_tracing_cpu(cpu) {
4197 			iter->buffer_iter[cpu] =
4198 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4199 							 cpu, GFP_KERNEL);
4200 		}
4201 		ring_buffer_read_prepare_sync();
4202 		for_each_tracing_cpu(cpu) {
4203 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4204 			tracing_iter_reset(iter, cpu);
4205 		}
4206 	} else {
4207 		cpu = iter->cpu_file;
4208 		iter->buffer_iter[cpu] =
4209 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4210 						 cpu, GFP_KERNEL);
4211 		ring_buffer_read_prepare_sync();
4212 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4213 		tracing_iter_reset(iter, cpu);
4214 	}
4215 
4216 	mutex_unlock(&trace_types_lock);
4217 
4218 	return iter;
4219 
4220  fail:
4221 	mutex_unlock(&trace_types_lock);
4222 	kfree(iter->trace);
4223 	kfree(iter->buffer_iter);
4224 release:
4225 	seq_release_private(inode, file);
4226 	return ERR_PTR(-ENOMEM);
4227 }
4228 
tracing_open_generic(struct inode * inode,struct file * filp)4229 int tracing_open_generic(struct inode *inode, struct file *filp)
4230 {
4231 	int ret;
4232 
4233 	ret = tracing_check_open_get_tr(NULL);
4234 	if (ret)
4235 		return ret;
4236 
4237 	filp->private_data = inode->i_private;
4238 	return 0;
4239 }
4240 
tracing_is_disabled(void)4241 bool tracing_is_disabled(void)
4242 {
4243 	return (tracing_disabled) ? true: false;
4244 }
4245 
4246 /*
4247  * Open and update trace_array ref count.
4248  * Must have the current trace_array passed to it.
4249  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4250 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4251 {
4252 	struct trace_array *tr = inode->i_private;
4253 	int ret;
4254 
4255 	ret = tracing_check_open_get_tr(tr);
4256 	if (ret)
4257 		return ret;
4258 
4259 	filp->private_data = inode->i_private;
4260 
4261 	return 0;
4262 }
4263 
4264 /*
4265  * The private pointer of the inode is the trace_event_file.
4266  * Update the tr ref count associated to it.
4267  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4268 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4269 {
4270 	struct trace_event_file *file = inode->i_private;
4271 	int ret;
4272 
4273 	ret = tracing_check_open_get_tr(file->tr);
4274 	if (ret)
4275 		return ret;
4276 
4277 	filp->private_data = inode->i_private;
4278 
4279 	return 0;
4280 }
4281 
tracing_release_file_tr(struct inode * inode,struct file * filp)4282 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4283 {
4284 	struct trace_event_file *file = inode->i_private;
4285 
4286 	trace_array_put(file->tr);
4287 
4288 	return 0;
4289 }
4290 
tracing_release(struct inode * inode,struct file * file)4291 static int tracing_release(struct inode *inode, struct file *file)
4292 {
4293 	struct trace_array *tr = inode->i_private;
4294 	struct seq_file *m = file->private_data;
4295 	struct trace_iterator *iter;
4296 	int cpu;
4297 
4298 	if (!(file->f_mode & FMODE_READ)) {
4299 		trace_array_put(tr);
4300 		return 0;
4301 	}
4302 
4303 	/* Writes do not use seq_file */
4304 	iter = m->private;
4305 	mutex_lock(&trace_types_lock);
4306 
4307 	for_each_tracing_cpu(cpu) {
4308 		if (iter->buffer_iter[cpu])
4309 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4310 	}
4311 
4312 	if (iter->trace && iter->trace->close)
4313 		iter->trace->close(iter);
4314 
4315 	if (!iter->snapshot)
4316 		/* reenable tracing if it was previously enabled */
4317 		tracing_start_tr(tr);
4318 
4319 	__trace_array_put(tr);
4320 
4321 	mutex_unlock(&trace_types_lock);
4322 
4323 	mutex_destroy(&iter->mutex);
4324 	free_cpumask_var(iter->started);
4325 	kfree(iter->trace);
4326 	kfree(iter->buffer_iter);
4327 	seq_release_private(inode, file);
4328 
4329 	return 0;
4330 }
4331 
tracing_release_generic_tr(struct inode * inode,struct file * file)4332 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4333 {
4334 	struct trace_array *tr = inode->i_private;
4335 
4336 	trace_array_put(tr);
4337 	return 0;
4338 }
4339 
tracing_single_release_tr(struct inode * inode,struct file * file)4340 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4341 {
4342 	struct trace_array *tr = inode->i_private;
4343 
4344 	trace_array_put(tr);
4345 
4346 	return single_release(inode, file);
4347 }
4348 
tracing_open(struct inode * inode,struct file * file)4349 static int tracing_open(struct inode *inode, struct file *file)
4350 {
4351 	struct trace_array *tr = inode->i_private;
4352 	struct trace_iterator *iter;
4353 	int ret;
4354 
4355 	ret = tracing_check_open_get_tr(tr);
4356 	if (ret)
4357 		return ret;
4358 
4359 	/* If this file was open for write, then erase contents */
4360 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4361 		int cpu = tracing_get_cpu(inode);
4362 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4363 
4364 #ifdef CONFIG_TRACER_MAX_TRACE
4365 		if (tr->current_trace->print_max)
4366 			trace_buf = &tr->max_buffer;
4367 #endif
4368 
4369 		if (cpu == RING_BUFFER_ALL_CPUS)
4370 			tracing_reset_online_cpus(trace_buf);
4371 		else
4372 			tracing_reset_cpu(trace_buf, cpu);
4373 	}
4374 
4375 	if (file->f_mode & FMODE_READ) {
4376 		iter = __tracing_open(inode, file, false);
4377 		if (IS_ERR(iter))
4378 			ret = PTR_ERR(iter);
4379 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4380 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4381 	}
4382 
4383 	if (ret < 0)
4384 		trace_array_put(tr);
4385 
4386 	return ret;
4387 }
4388 
4389 /*
4390  * Some tracers are not suitable for instance buffers.
4391  * A tracer is always available for the global array (toplevel)
4392  * or if it explicitly states that it is.
4393  */
4394 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4395 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4396 {
4397 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4398 }
4399 
4400 /* Find the next tracer that this trace array may use */
4401 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4402 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4403 {
4404 	while (t && !trace_ok_for_array(t, tr))
4405 		t = t->next;
4406 
4407 	return t;
4408 }
4409 
4410 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4411 t_next(struct seq_file *m, void *v, loff_t *pos)
4412 {
4413 	struct trace_array *tr = m->private;
4414 	struct tracer *t = v;
4415 
4416 	(*pos)++;
4417 
4418 	if (t)
4419 		t = get_tracer_for_array(tr, t->next);
4420 
4421 	return t;
4422 }
4423 
t_start(struct seq_file * m,loff_t * pos)4424 static void *t_start(struct seq_file *m, loff_t *pos)
4425 {
4426 	struct trace_array *tr = m->private;
4427 	struct tracer *t;
4428 	loff_t l = 0;
4429 
4430 	mutex_lock(&trace_types_lock);
4431 
4432 	t = get_tracer_for_array(tr, trace_types);
4433 	for (; t && l < *pos; t = t_next(m, t, &l))
4434 			;
4435 
4436 	return t;
4437 }
4438 
t_stop(struct seq_file * m,void * p)4439 static void t_stop(struct seq_file *m, void *p)
4440 {
4441 	mutex_unlock(&trace_types_lock);
4442 }
4443 
t_show(struct seq_file * m,void * v)4444 static int t_show(struct seq_file *m, void *v)
4445 {
4446 	struct tracer *t = v;
4447 
4448 	if (!t)
4449 		return 0;
4450 
4451 	seq_puts(m, t->name);
4452 	if (t->next)
4453 		seq_putc(m, ' ');
4454 	else
4455 		seq_putc(m, '\n');
4456 
4457 	return 0;
4458 }
4459 
4460 static const struct seq_operations show_traces_seq_ops = {
4461 	.start		= t_start,
4462 	.next		= t_next,
4463 	.stop		= t_stop,
4464 	.show		= t_show,
4465 };
4466 
show_traces_open(struct inode * inode,struct file * file)4467 static int show_traces_open(struct inode *inode, struct file *file)
4468 {
4469 	struct trace_array *tr = inode->i_private;
4470 	struct seq_file *m;
4471 	int ret;
4472 
4473 	ret = tracing_check_open_get_tr(tr);
4474 	if (ret)
4475 		return ret;
4476 
4477 	ret = seq_open(file, &show_traces_seq_ops);
4478 	if (ret) {
4479 		trace_array_put(tr);
4480 		return ret;
4481 	}
4482 
4483 	m = file->private_data;
4484 	m->private = tr;
4485 
4486 	return 0;
4487 }
4488 
show_traces_release(struct inode * inode,struct file * file)4489 static int show_traces_release(struct inode *inode, struct file *file)
4490 {
4491 	struct trace_array *tr = inode->i_private;
4492 
4493 	trace_array_put(tr);
4494 	return seq_release(inode, file);
4495 }
4496 
4497 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4498 tracing_write_stub(struct file *filp, const char __user *ubuf,
4499 		   size_t count, loff_t *ppos)
4500 {
4501 	return count;
4502 }
4503 
tracing_lseek(struct file * file,loff_t offset,int whence)4504 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4505 {
4506 	int ret;
4507 
4508 	if (file->f_mode & FMODE_READ)
4509 		ret = seq_lseek(file, offset, whence);
4510 	else
4511 		file->f_pos = ret = 0;
4512 
4513 	return ret;
4514 }
4515 
4516 static const struct file_operations tracing_fops = {
4517 	.open		= tracing_open,
4518 	.read		= seq_read,
4519 	.write		= tracing_write_stub,
4520 	.llseek		= tracing_lseek,
4521 	.release	= tracing_release,
4522 };
4523 
4524 static const struct file_operations show_traces_fops = {
4525 	.open		= show_traces_open,
4526 	.read		= seq_read,
4527 	.llseek		= seq_lseek,
4528 	.release	= show_traces_release,
4529 };
4530 
4531 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4532 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4533 		     size_t count, loff_t *ppos)
4534 {
4535 	struct trace_array *tr = file_inode(filp)->i_private;
4536 	char *mask_str;
4537 	int len;
4538 
4539 	len = snprintf(NULL, 0, "%*pb\n",
4540 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4541 	mask_str = kmalloc(len, GFP_KERNEL);
4542 	if (!mask_str)
4543 		return -ENOMEM;
4544 
4545 	len = snprintf(mask_str, len, "%*pb\n",
4546 		       cpumask_pr_args(tr->tracing_cpumask));
4547 	if (len >= count) {
4548 		count = -EINVAL;
4549 		goto out_err;
4550 	}
4551 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4552 
4553 out_err:
4554 	kfree(mask_str);
4555 
4556 	return count;
4557 }
4558 
4559 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4560 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4561 		      size_t count, loff_t *ppos)
4562 {
4563 	struct trace_array *tr = file_inode(filp)->i_private;
4564 	cpumask_var_t tracing_cpumask_new;
4565 	int err, cpu;
4566 
4567 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4568 		return -ENOMEM;
4569 
4570 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4571 	if (err)
4572 		goto err_unlock;
4573 
4574 	local_irq_disable();
4575 	arch_spin_lock(&tr->max_lock);
4576 	for_each_tracing_cpu(cpu) {
4577 		/*
4578 		 * Increase/decrease the disabled counter if we are
4579 		 * about to flip a bit in the cpumask:
4580 		 */
4581 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4582 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4583 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4584 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4585 		}
4586 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4587 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4588 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4589 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4590 		}
4591 	}
4592 	arch_spin_unlock(&tr->max_lock);
4593 	local_irq_enable();
4594 
4595 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4596 	free_cpumask_var(tracing_cpumask_new);
4597 
4598 	return count;
4599 
4600 err_unlock:
4601 	free_cpumask_var(tracing_cpumask_new);
4602 
4603 	return err;
4604 }
4605 
4606 static const struct file_operations tracing_cpumask_fops = {
4607 	.open		= tracing_open_generic_tr,
4608 	.read		= tracing_cpumask_read,
4609 	.write		= tracing_cpumask_write,
4610 	.release	= tracing_release_generic_tr,
4611 	.llseek		= generic_file_llseek,
4612 };
4613 
tracing_trace_options_show(struct seq_file * m,void * v)4614 static int tracing_trace_options_show(struct seq_file *m, void *v)
4615 {
4616 	struct tracer_opt *trace_opts;
4617 	struct trace_array *tr = m->private;
4618 	u32 tracer_flags;
4619 	int i;
4620 
4621 	mutex_lock(&trace_types_lock);
4622 	tracer_flags = tr->current_trace->flags->val;
4623 	trace_opts = tr->current_trace->flags->opts;
4624 
4625 	for (i = 0; trace_options[i]; i++) {
4626 		if (tr->trace_flags & (1 << i))
4627 			seq_printf(m, "%s\n", trace_options[i]);
4628 		else
4629 			seq_printf(m, "no%s\n", trace_options[i]);
4630 	}
4631 
4632 	for (i = 0; trace_opts[i].name; i++) {
4633 		if (tracer_flags & trace_opts[i].bit)
4634 			seq_printf(m, "%s\n", trace_opts[i].name);
4635 		else
4636 			seq_printf(m, "no%s\n", trace_opts[i].name);
4637 	}
4638 	mutex_unlock(&trace_types_lock);
4639 
4640 	return 0;
4641 }
4642 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4643 static int __set_tracer_option(struct trace_array *tr,
4644 			       struct tracer_flags *tracer_flags,
4645 			       struct tracer_opt *opts, int neg)
4646 {
4647 	struct tracer *trace = tracer_flags->trace;
4648 	int ret;
4649 
4650 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4651 	if (ret)
4652 		return ret;
4653 
4654 	if (neg)
4655 		tracer_flags->val &= ~opts->bit;
4656 	else
4657 		tracer_flags->val |= opts->bit;
4658 	return 0;
4659 }
4660 
4661 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4662 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4663 {
4664 	struct tracer *trace = tr->current_trace;
4665 	struct tracer_flags *tracer_flags = trace->flags;
4666 	struct tracer_opt *opts = NULL;
4667 	int i;
4668 
4669 	for (i = 0; tracer_flags->opts[i].name; i++) {
4670 		opts = &tracer_flags->opts[i];
4671 
4672 		if (strcmp(cmp, opts->name) == 0)
4673 			return __set_tracer_option(tr, trace->flags, opts, neg);
4674 	}
4675 
4676 	return -EINVAL;
4677 }
4678 
4679 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4680 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4681 {
4682 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4683 		return -1;
4684 
4685 	return 0;
4686 }
4687 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4688 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4689 {
4690 	int *map;
4691 
4692 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4693 	    (mask == TRACE_ITER_RECORD_CMD))
4694 		lockdep_assert_held(&event_mutex);
4695 
4696 	/* do nothing if flag is already set */
4697 	if (!!(tr->trace_flags & mask) == !!enabled)
4698 		return 0;
4699 
4700 	/* Give the tracer a chance to approve the change */
4701 	if (tr->current_trace->flag_changed)
4702 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4703 			return -EINVAL;
4704 
4705 	if (enabled)
4706 		tr->trace_flags |= mask;
4707 	else
4708 		tr->trace_flags &= ~mask;
4709 
4710 	if (mask == TRACE_ITER_RECORD_CMD)
4711 		trace_event_enable_cmd_record(enabled);
4712 
4713 	if (mask == TRACE_ITER_RECORD_TGID) {
4714 		if (!tgid_map) {
4715 			tgid_map_max = pid_max;
4716 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4717 				       GFP_KERNEL);
4718 
4719 			/*
4720 			 * Pairs with smp_load_acquire() in
4721 			 * trace_find_tgid_ptr() to ensure that if it observes
4722 			 * the tgid_map we just allocated then it also observes
4723 			 * the corresponding tgid_map_max value.
4724 			 */
4725 			smp_store_release(&tgid_map, map);
4726 		}
4727 		if (!tgid_map) {
4728 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4729 			return -ENOMEM;
4730 		}
4731 
4732 		trace_event_enable_tgid_record(enabled);
4733 	}
4734 
4735 	if (mask == TRACE_ITER_EVENT_FORK)
4736 		trace_event_follow_fork(tr, enabled);
4737 
4738 	if (mask == TRACE_ITER_FUNC_FORK)
4739 		ftrace_pid_follow_fork(tr, enabled);
4740 
4741 	if (mask == TRACE_ITER_OVERWRITE) {
4742 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4743 #ifdef CONFIG_TRACER_MAX_TRACE
4744 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4745 #endif
4746 	}
4747 
4748 	if (mask == TRACE_ITER_PRINTK) {
4749 		trace_printk_start_stop_comm(enabled);
4750 		trace_printk_control(enabled);
4751 	}
4752 
4753 	return 0;
4754 }
4755 
trace_set_options(struct trace_array * tr,char * option)4756 static int trace_set_options(struct trace_array *tr, char *option)
4757 {
4758 	char *cmp;
4759 	int neg = 0;
4760 	int ret;
4761 	size_t orig_len = strlen(option);
4762 	int len;
4763 
4764 	cmp = strstrip(option);
4765 
4766 	len = str_has_prefix(cmp, "no");
4767 	if (len)
4768 		neg = 1;
4769 
4770 	cmp += len;
4771 
4772 	mutex_lock(&event_mutex);
4773 	mutex_lock(&trace_types_lock);
4774 
4775 	ret = match_string(trace_options, -1, cmp);
4776 	/* If no option could be set, test the specific tracer options */
4777 	if (ret < 0)
4778 		ret = set_tracer_option(tr, cmp, neg);
4779 	else
4780 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4781 
4782 	mutex_unlock(&trace_types_lock);
4783 	mutex_unlock(&event_mutex);
4784 
4785 	/*
4786 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4787 	 * turn it back into a space.
4788 	 */
4789 	if (orig_len > strlen(option))
4790 		option[strlen(option)] = ' ';
4791 
4792 	return ret;
4793 }
4794 
apply_trace_boot_options(void)4795 static void __init apply_trace_boot_options(void)
4796 {
4797 	char *buf = trace_boot_options_buf;
4798 	char *option;
4799 
4800 	while (true) {
4801 		option = strsep(&buf, ",");
4802 
4803 		if (!option)
4804 			break;
4805 
4806 		if (*option)
4807 			trace_set_options(&global_trace, option);
4808 
4809 		/* Put back the comma to allow this to be called again */
4810 		if (buf)
4811 			*(buf - 1) = ',';
4812 	}
4813 }
4814 
4815 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4816 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4817 			size_t cnt, loff_t *ppos)
4818 {
4819 	struct seq_file *m = filp->private_data;
4820 	struct trace_array *tr = m->private;
4821 	char buf[64];
4822 	int ret;
4823 
4824 	if (cnt >= sizeof(buf))
4825 		return -EINVAL;
4826 
4827 	if (copy_from_user(buf, ubuf, cnt))
4828 		return -EFAULT;
4829 
4830 	buf[cnt] = 0;
4831 
4832 	ret = trace_set_options(tr, buf);
4833 	if (ret < 0)
4834 		return ret;
4835 
4836 	*ppos += cnt;
4837 
4838 	return cnt;
4839 }
4840 
tracing_trace_options_open(struct inode * inode,struct file * file)4841 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4842 {
4843 	struct trace_array *tr = inode->i_private;
4844 	int ret;
4845 
4846 	ret = tracing_check_open_get_tr(tr);
4847 	if (ret)
4848 		return ret;
4849 
4850 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4851 	if (ret < 0)
4852 		trace_array_put(tr);
4853 
4854 	return ret;
4855 }
4856 
4857 static const struct file_operations tracing_iter_fops = {
4858 	.open		= tracing_trace_options_open,
4859 	.read		= seq_read,
4860 	.llseek		= seq_lseek,
4861 	.release	= tracing_single_release_tr,
4862 	.write		= tracing_trace_options_write,
4863 };
4864 
4865 static const char readme_msg[] =
4866 	"tracing mini-HOWTO:\n\n"
4867 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4868 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4869 	" Important files:\n"
4870 	"  trace\t\t\t- The static contents of the buffer\n"
4871 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4872 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4873 	"  current_tracer\t- function and latency tracers\n"
4874 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4875 	"  error_log\t- error log for failed commands (that support it)\n"
4876 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4877 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4878 	"  trace_clock\t\t-change the clock used to order events\n"
4879 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4880 	"      global:   Synced across CPUs but slows tracing down.\n"
4881 	"     counter:   Not a clock, but just an increment\n"
4882 	"      uptime:   Jiffy counter from time of boot\n"
4883 	"        perf:   Same clock that perf events use\n"
4884 #ifdef CONFIG_X86_64
4885 	"     x86-tsc:   TSC cycle counter\n"
4886 #endif
4887 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4888 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4889 	"    absolute:   Absolute (standalone) timestamp\n"
4890 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4891 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4892 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4893 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4894 	"\t\t\t  Remove sub-buffer with rmdir\n"
4895 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4896 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4897 	"\t\t\t  option name\n"
4898 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4899 #ifdef CONFIG_DYNAMIC_FTRACE
4900 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4901 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4902 	"\t\t\t  functions\n"
4903 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4904 	"\t     modules: Can select a group via module\n"
4905 	"\t      Format: :mod:<module-name>\n"
4906 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4907 	"\t    triggers: a command to perform when function is hit\n"
4908 	"\t      Format: <function>:<trigger>[:count]\n"
4909 	"\t     trigger: traceon, traceoff\n"
4910 	"\t\t      enable_event:<system>:<event>\n"
4911 	"\t\t      disable_event:<system>:<event>\n"
4912 #ifdef CONFIG_STACKTRACE
4913 	"\t\t      stacktrace\n"
4914 #endif
4915 #ifdef CONFIG_TRACER_SNAPSHOT
4916 	"\t\t      snapshot\n"
4917 #endif
4918 	"\t\t      dump\n"
4919 	"\t\t      cpudump\n"
4920 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4921 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4922 	"\t     The first one will disable tracing every time do_fault is hit\n"
4923 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4924 	"\t       The first time do trap is hit and it disables tracing, the\n"
4925 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4926 	"\t       the counter will not decrement. It only decrements when the\n"
4927 	"\t       trigger did work\n"
4928 	"\t     To remove trigger without count:\n"
4929 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4930 	"\t     To remove trigger with a count:\n"
4931 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4932 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4933 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4934 	"\t    modules: Can select a group via module command :mod:\n"
4935 	"\t    Does not accept triggers\n"
4936 #endif /* CONFIG_DYNAMIC_FTRACE */
4937 #ifdef CONFIG_FUNCTION_TRACER
4938 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4939 	"\t\t    (function)\n"
4940 #endif
4941 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4942 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4943 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4944 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4945 #endif
4946 #ifdef CONFIG_TRACER_SNAPSHOT
4947 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4948 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4949 	"\t\t\t  information\n"
4950 #endif
4951 #ifdef CONFIG_STACK_TRACER
4952 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4953 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4954 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4955 	"\t\t\t  new trace)\n"
4956 #ifdef CONFIG_DYNAMIC_FTRACE
4957 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4958 	"\t\t\t  traces\n"
4959 #endif
4960 #endif /* CONFIG_STACK_TRACER */
4961 #ifdef CONFIG_DYNAMIC_EVENTS
4962 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4963 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4964 #endif
4965 #ifdef CONFIG_KPROBE_EVENTS
4966 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4967 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4968 #endif
4969 #ifdef CONFIG_UPROBE_EVENTS
4970 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4971 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4972 #endif
4973 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4974 	"\t  accepts: event-definitions (one definition per line)\n"
4975 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4976 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4977 #ifdef CONFIG_HIST_TRIGGERS
4978 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4979 #endif
4980 	"\t           -:[<group>/]<event>\n"
4981 #ifdef CONFIG_KPROBE_EVENTS
4982 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4983   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4984 #endif
4985 #ifdef CONFIG_UPROBE_EVENTS
4986   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4987 #endif
4988 	"\t     args: <name>=fetcharg[:type]\n"
4989 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4990 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4991 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4992 #else
4993 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4994 #endif
4995 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4996 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4997 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4998 	"\t           <type>\\[<array-size>\\]\n"
4999 #ifdef CONFIG_HIST_TRIGGERS
5000 	"\t    field: <stype> <name>;\n"
5001 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5002 	"\t           [unsigned] char/int/long\n"
5003 #endif
5004 #endif
5005 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5006 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5007 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5008 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5009 	"\t\t\t  events\n"
5010 	"      filter\t\t- If set, only events passing filter are traced\n"
5011 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5012 	"\t\t\t  <event>:\n"
5013 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5014 	"      filter\t\t- If set, only events passing filter are traced\n"
5015 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5016 	"\t    Format: <trigger>[:count][if <filter>]\n"
5017 	"\t   trigger: traceon, traceoff\n"
5018 	"\t            enable_event:<system>:<event>\n"
5019 	"\t            disable_event:<system>:<event>\n"
5020 #ifdef CONFIG_HIST_TRIGGERS
5021 	"\t            enable_hist:<system>:<event>\n"
5022 	"\t            disable_hist:<system>:<event>\n"
5023 #endif
5024 #ifdef CONFIG_STACKTRACE
5025 	"\t\t    stacktrace\n"
5026 #endif
5027 #ifdef CONFIG_TRACER_SNAPSHOT
5028 	"\t\t    snapshot\n"
5029 #endif
5030 #ifdef CONFIG_HIST_TRIGGERS
5031 	"\t\t    hist (see below)\n"
5032 #endif
5033 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5034 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5035 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5036 	"\t                  events/block/block_unplug/trigger\n"
5037 	"\t   The first disables tracing every time block_unplug is hit.\n"
5038 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5039 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5040 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5041 	"\t   Like function triggers, the counter is only decremented if it\n"
5042 	"\t    enabled or disabled tracing.\n"
5043 	"\t   To remove a trigger without a count:\n"
5044 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5045 	"\t   To remove a trigger with a count:\n"
5046 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5047 	"\t   Filters can be ignored when removing a trigger.\n"
5048 #ifdef CONFIG_HIST_TRIGGERS
5049 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5050 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5051 	"\t            [:values=<field1[,field2,...]>]\n"
5052 	"\t            [:sort=<field1[,field2,...]>]\n"
5053 	"\t            [:size=#entries]\n"
5054 	"\t            [:pause][:continue][:clear]\n"
5055 	"\t            [:name=histname1]\n"
5056 	"\t            [:<handler>.<action>]\n"
5057 	"\t            [if <filter>]\n\n"
5058 	"\t    Note, special fields can be used as well:\n"
5059 	"\t            common_timestamp - to record current timestamp\n"
5060 	"\t            common_cpu - to record the CPU the event happened on\n"
5061 	"\n"
5062 	"\t    When a matching event is hit, an entry is added to a hash\n"
5063 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5064 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5065 	"\t    correspond to fields in the event's format description.  Keys\n"
5066 	"\t    can be any field, or the special string 'stacktrace'.\n"
5067 	"\t    Compound keys consisting of up to two fields can be specified\n"
5068 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5069 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5070 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5071 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5072 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5073 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5074 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5075 	"\t    its histogram data will be shared with other triggers of the\n"
5076 	"\t    same name, and trigger hits will update this common data.\n\n"
5077 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5078 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5079 	"\t    triggers attached to an event, there will be a table for each\n"
5080 	"\t    trigger in the output.  The table displayed for a named\n"
5081 	"\t    trigger will be the same as any other instance having the\n"
5082 	"\t    same name.  The default format used to display a given field\n"
5083 	"\t    can be modified by appending any of the following modifiers\n"
5084 	"\t    to the field name, as applicable:\n\n"
5085 	"\t            .hex        display a number as a hex value\n"
5086 	"\t            .sym        display an address as a symbol\n"
5087 	"\t            .sym-offset display an address as a symbol and offset\n"
5088 	"\t            .execname   display a common_pid as a program name\n"
5089 	"\t            .syscall    display a syscall id as a syscall name\n"
5090 	"\t            .log2       display log2 value rather than raw number\n"
5091 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5092 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5093 	"\t    trigger or to start a hist trigger but not log any events\n"
5094 	"\t    until told to do so.  'continue' can be used to start or\n"
5095 	"\t    restart a paused hist trigger.\n\n"
5096 	"\t    The 'clear' parameter will clear the contents of a running\n"
5097 	"\t    hist trigger and leave its current paused/active state\n"
5098 	"\t    unchanged.\n\n"
5099 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5100 	"\t    have one event conditionally start and stop another event's\n"
5101 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5102 	"\t    the enable_event and disable_event triggers.\n\n"
5103 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5104 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5105 	"\t        <handler>.<action>\n\n"
5106 	"\t    The available handlers are:\n\n"
5107 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5108 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5109 	"\t        onchange(var)            - invoke action if var changes\n\n"
5110 	"\t    The available actions are:\n\n"
5111 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5112 	"\t        save(field,...)                      - save current event fields\n"
5113 #ifdef CONFIG_TRACER_SNAPSHOT
5114 	"\t        snapshot()                           - snapshot the trace buffer\n"
5115 #endif
5116 #endif
5117 ;
5118 
5119 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5120 tracing_readme_read(struct file *filp, char __user *ubuf,
5121 		       size_t cnt, loff_t *ppos)
5122 {
5123 	return simple_read_from_buffer(ubuf, cnt, ppos,
5124 					readme_msg, strlen(readme_msg));
5125 }
5126 
5127 static const struct file_operations tracing_readme_fops = {
5128 	.open		= tracing_open_generic,
5129 	.read		= tracing_readme_read,
5130 	.llseek		= generic_file_llseek,
5131 };
5132 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5133 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5134 {
5135 	int pid = ++(*pos);
5136 
5137 	return trace_find_tgid_ptr(pid);
5138 }
5139 
saved_tgids_start(struct seq_file * m,loff_t * pos)5140 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5141 {
5142 	int pid = *pos;
5143 
5144 	return trace_find_tgid_ptr(pid);
5145 }
5146 
saved_tgids_stop(struct seq_file * m,void * v)5147 static void saved_tgids_stop(struct seq_file *m, void *v)
5148 {
5149 }
5150 
saved_tgids_show(struct seq_file * m,void * v)5151 static int saved_tgids_show(struct seq_file *m, void *v)
5152 {
5153 	int *entry = (int *)v;
5154 	int pid = entry - tgid_map;
5155 	int tgid = *entry;
5156 
5157 	if (tgid == 0)
5158 		return SEQ_SKIP;
5159 
5160 	seq_printf(m, "%d %d\n", pid, tgid);
5161 	return 0;
5162 }
5163 
5164 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5165 	.start		= saved_tgids_start,
5166 	.stop		= saved_tgids_stop,
5167 	.next		= saved_tgids_next,
5168 	.show		= saved_tgids_show,
5169 };
5170 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5171 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5172 {
5173 	int ret;
5174 
5175 	ret = tracing_check_open_get_tr(NULL);
5176 	if (ret)
5177 		return ret;
5178 
5179 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5180 }
5181 
5182 
5183 static const struct file_operations tracing_saved_tgids_fops = {
5184 	.open		= tracing_saved_tgids_open,
5185 	.read		= seq_read,
5186 	.llseek		= seq_lseek,
5187 	.release	= seq_release,
5188 };
5189 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5190 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5191 {
5192 	unsigned int *ptr = v;
5193 
5194 	if (*pos || m->count)
5195 		ptr++;
5196 
5197 	(*pos)++;
5198 
5199 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5200 	     ptr++) {
5201 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5202 			continue;
5203 
5204 		return ptr;
5205 	}
5206 
5207 	return NULL;
5208 }
5209 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5210 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5211 {
5212 	void *v;
5213 	loff_t l = 0;
5214 
5215 	preempt_disable();
5216 	arch_spin_lock(&trace_cmdline_lock);
5217 
5218 	v = &savedcmd->map_cmdline_to_pid[0];
5219 	while (l <= *pos) {
5220 		v = saved_cmdlines_next(m, v, &l);
5221 		if (!v)
5222 			return NULL;
5223 	}
5224 
5225 	return v;
5226 }
5227 
saved_cmdlines_stop(struct seq_file * m,void * v)5228 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5229 {
5230 	arch_spin_unlock(&trace_cmdline_lock);
5231 	preempt_enable();
5232 }
5233 
saved_cmdlines_show(struct seq_file * m,void * v)5234 static int saved_cmdlines_show(struct seq_file *m, void *v)
5235 {
5236 	char buf[TASK_COMM_LEN];
5237 	unsigned int *pid = v;
5238 
5239 	__trace_find_cmdline(*pid, buf);
5240 	seq_printf(m, "%d %s\n", *pid, buf);
5241 	return 0;
5242 }
5243 
5244 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5245 	.start		= saved_cmdlines_start,
5246 	.next		= saved_cmdlines_next,
5247 	.stop		= saved_cmdlines_stop,
5248 	.show		= saved_cmdlines_show,
5249 };
5250 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5251 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5252 {
5253 	int ret;
5254 
5255 	ret = tracing_check_open_get_tr(NULL);
5256 	if (ret)
5257 		return ret;
5258 
5259 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5260 }
5261 
5262 static const struct file_operations tracing_saved_cmdlines_fops = {
5263 	.open		= tracing_saved_cmdlines_open,
5264 	.read		= seq_read,
5265 	.llseek		= seq_lseek,
5266 	.release	= seq_release,
5267 };
5268 
5269 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5270 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5271 				 size_t cnt, loff_t *ppos)
5272 {
5273 	char buf[64];
5274 	int r;
5275 
5276 	preempt_disable();
5277 	arch_spin_lock(&trace_cmdline_lock);
5278 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5279 	arch_spin_unlock(&trace_cmdline_lock);
5280 	preempt_enable();
5281 
5282 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5283 }
5284 
tracing_resize_saved_cmdlines(unsigned int val)5285 static int tracing_resize_saved_cmdlines(unsigned int val)
5286 {
5287 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5288 
5289 	s = allocate_cmdlines_buffer(val);
5290 	if (!s)
5291 		return -ENOMEM;
5292 
5293 	preempt_disable();
5294 	arch_spin_lock(&trace_cmdline_lock);
5295 	savedcmd_temp = savedcmd;
5296 	savedcmd = s;
5297 	arch_spin_unlock(&trace_cmdline_lock);
5298 	preempt_enable();
5299 	free_saved_cmdlines_buffer(savedcmd_temp);
5300 
5301 	return 0;
5302 }
5303 
5304 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5305 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5306 				  size_t cnt, loff_t *ppos)
5307 {
5308 	unsigned long val;
5309 	int ret;
5310 
5311 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5312 	if (ret)
5313 		return ret;
5314 
5315 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5316 	if (!val || val > PID_MAX_DEFAULT)
5317 		return -EINVAL;
5318 
5319 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5320 	if (ret < 0)
5321 		return ret;
5322 
5323 	*ppos += cnt;
5324 
5325 	return cnt;
5326 }
5327 
5328 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5329 	.open		= tracing_open_generic,
5330 	.read		= tracing_saved_cmdlines_size_read,
5331 	.write		= tracing_saved_cmdlines_size_write,
5332 };
5333 
5334 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5335 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5336 update_eval_map(union trace_eval_map_item *ptr)
5337 {
5338 	if (!ptr->map.eval_string) {
5339 		if (ptr->tail.next) {
5340 			ptr = ptr->tail.next;
5341 			/* Set ptr to the next real item (skip head) */
5342 			ptr++;
5343 		} else
5344 			return NULL;
5345 	}
5346 	return ptr;
5347 }
5348 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5349 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5350 {
5351 	union trace_eval_map_item *ptr = v;
5352 
5353 	/*
5354 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5355 	 * This really should never happen.
5356 	 */
5357 	ptr = update_eval_map(ptr);
5358 	if (WARN_ON_ONCE(!ptr))
5359 		return NULL;
5360 
5361 	ptr++;
5362 
5363 	(*pos)++;
5364 
5365 	ptr = update_eval_map(ptr);
5366 
5367 	return ptr;
5368 }
5369 
eval_map_start(struct seq_file * m,loff_t * pos)5370 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5371 {
5372 	union trace_eval_map_item *v;
5373 	loff_t l = 0;
5374 
5375 	mutex_lock(&trace_eval_mutex);
5376 
5377 	v = trace_eval_maps;
5378 	if (v)
5379 		v++;
5380 
5381 	while (v && l < *pos) {
5382 		v = eval_map_next(m, v, &l);
5383 	}
5384 
5385 	return v;
5386 }
5387 
eval_map_stop(struct seq_file * m,void * v)5388 static void eval_map_stop(struct seq_file *m, void *v)
5389 {
5390 	mutex_unlock(&trace_eval_mutex);
5391 }
5392 
eval_map_show(struct seq_file * m,void * v)5393 static int eval_map_show(struct seq_file *m, void *v)
5394 {
5395 	union trace_eval_map_item *ptr = v;
5396 
5397 	seq_printf(m, "%s %ld (%s)\n",
5398 		   ptr->map.eval_string, ptr->map.eval_value,
5399 		   ptr->map.system);
5400 
5401 	return 0;
5402 }
5403 
5404 static const struct seq_operations tracing_eval_map_seq_ops = {
5405 	.start		= eval_map_start,
5406 	.next		= eval_map_next,
5407 	.stop		= eval_map_stop,
5408 	.show		= eval_map_show,
5409 };
5410 
tracing_eval_map_open(struct inode * inode,struct file * filp)5411 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5412 {
5413 	int ret;
5414 
5415 	ret = tracing_check_open_get_tr(NULL);
5416 	if (ret)
5417 		return ret;
5418 
5419 	return seq_open(filp, &tracing_eval_map_seq_ops);
5420 }
5421 
5422 static const struct file_operations tracing_eval_map_fops = {
5423 	.open		= tracing_eval_map_open,
5424 	.read		= seq_read,
5425 	.llseek		= seq_lseek,
5426 	.release	= seq_release,
5427 };
5428 
5429 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5430 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5431 {
5432 	/* Return tail of array given the head */
5433 	return ptr + ptr->head.length + 1;
5434 }
5435 
5436 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5437 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5438 			   int len)
5439 {
5440 	struct trace_eval_map **stop;
5441 	struct trace_eval_map **map;
5442 	union trace_eval_map_item *map_array;
5443 	union trace_eval_map_item *ptr;
5444 
5445 	stop = start + len;
5446 
5447 	/*
5448 	 * The trace_eval_maps contains the map plus a head and tail item,
5449 	 * where the head holds the module and length of array, and the
5450 	 * tail holds a pointer to the next list.
5451 	 */
5452 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5453 	if (!map_array) {
5454 		pr_warn("Unable to allocate trace eval mapping\n");
5455 		return;
5456 	}
5457 
5458 	mutex_lock(&trace_eval_mutex);
5459 
5460 	if (!trace_eval_maps)
5461 		trace_eval_maps = map_array;
5462 	else {
5463 		ptr = trace_eval_maps;
5464 		for (;;) {
5465 			ptr = trace_eval_jmp_to_tail(ptr);
5466 			if (!ptr->tail.next)
5467 				break;
5468 			ptr = ptr->tail.next;
5469 
5470 		}
5471 		ptr->tail.next = map_array;
5472 	}
5473 	map_array->head.mod = mod;
5474 	map_array->head.length = len;
5475 	map_array++;
5476 
5477 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5478 		map_array->map = **map;
5479 		map_array++;
5480 	}
5481 	memset(map_array, 0, sizeof(*map_array));
5482 
5483 	mutex_unlock(&trace_eval_mutex);
5484 }
5485 
trace_create_eval_file(struct dentry * d_tracer)5486 static void trace_create_eval_file(struct dentry *d_tracer)
5487 {
5488 	trace_create_file("eval_map", 0444, d_tracer,
5489 			  NULL, &tracing_eval_map_fops);
5490 }
5491 
5492 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5493 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5494 static inline void trace_insert_eval_map_file(struct module *mod,
5495 			      struct trace_eval_map **start, int len) { }
5496 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5497 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5498 static void trace_insert_eval_map(struct module *mod,
5499 				  struct trace_eval_map **start, int len)
5500 {
5501 	struct trace_eval_map **map;
5502 
5503 	if (len <= 0)
5504 		return;
5505 
5506 	map = start;
5507 
5508 	trace_event_eval_update(map, len);
5509 
5510 	trace_insert_eval_map_file(mod, start, len);
5511 }
5512 
5513 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5514 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5515 		       size_t cnt, loff_t *ppos)
5516 {
5517 	struct trace_array *tr = filp->private_data;
5518 	char buf[MAX_TRACER_SIZE+2];
5519 	int r;
5520 
5521 	mutex_lock(&trace_types_lock);
5522 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5523 	mutex_unlock(&trace_types_lock);
5524 
5525 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5526 }
5527 
tracer_init(struct tracer * t,struct trace_array * tr)5528 int tracer_init(struct tracer *t, struct trace_array *tr)
5529 {
5530 	tracing_reset_online_cpus(&tr->trace_buffer);
5531 	return t->init(tr);
5532 }
5533 
set_buffer_entries(struct trace_buffer * buf,unsigned long val)5534 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5535 {
5536 	int cpu;
5537 
5538 	for_each_tracing_cpu(cpu)
5539 		per_cpu_ptr(buf->data, cpu)->entries = val;
5540 }
5541 
5542 #ifdef CONFIG_TRACER_MAX_TRACE
5543 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct trace_buffer * trace_buf,struct trace_buffer * size_buf,int cpu_id)5544 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5545 					struct trace_buffer *size_buf, int cpu_id)
5546 {
5547 	int cpu, ret = 0;
5548 
5549 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5550 		for_each_tracing_cpu(cpu) {
5551 			ret = ring_buffer_resize(trace_buf->buffer,
5552 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5553 			if (ret < 0)
5554 				break;
5555 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5556 				per_cpu_ptr(size_buf->data, cpu)->entries;
5557 		}
5558 	} else {
5559 		ret = ring_buffer_resize(trace_buf->buffer,
5560 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5561 		if (ret == 0)
5562 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5563 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5564 	}
5565 
5566 	return ret;
5567 }
5568 #endif /* CONFIG_TRACER_MAX_TRACE */
5569 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5570 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5571 					unsigned long size, int cpu)
5572 {
5573 	int ret;
5574 
5575 	/*
5576 	 * If kernel or user changes the size of the ring buffer
5577 	 * we use the size that was given, and we can forget about
5578 	 * expanding it later.
5579 	 */
5580 	ring_buffer_expanded = true;
5581 
5582 	/* May be called before buffers are initialized */
5583 	if (!tr->trace_buffer.buffer)
5584 		return 0;
5585 
5586 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5587 	if (ret < 0)
5588 		return ret;
5589 
5590 #ifdef CONFIG_TRACER_MAX_TRACE
5591 	if (!tr->current_trace->use_max_tr)
5592 		goto out;
5593 
5594 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5595 	if (ret < 0) {
5596 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5597 						     &tr->trace_buffer, cpu);
5598 		if (r < 0) {
5599 			/*
5600 			 * AARGH! We are left with different
5601 			 * size max buffer!!!!
5602 			 * The max buffer is our "snapshot" buffer.
5603 			 * When a tracer needs a snapshot (one of the
5604 			 * latency tracers), it swaps the max buffer
5605 			 * with the saved snap shot. We succeeded to
5606 			 * update the size of the main buffer, but failed to
5607 			 * update the size of the max buffer. But when we tried
5608 			 * to reset the main buffer to the original size, we
5609 			 * failed there too. This is very unlikely to
5610 			 * happen, but if it does, warn and kill all
5611 			 * tracing.
5612 			 */
5613 			WARN_ON(1);
5614 			tracing_disabled = 1;
5615 		}
5616 		return ret;
5617 	}
5618 
5619 	if (cpu == RING_BUFFER_ALL_CPUS)
5620 		set_buffer_entries(&tr->max_buffer, size);
5621 	else
5622 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5623 
5624  out:
5625 #endif /* CONFIG_TRACER_MAX_TRACE */
5626 
5627 	if (cpu == RING_BUFFER_ALL_CPUS)
5628 		set_buffer_entries(&tr->trace_buffer, size);
5629 	else
5630 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5631 
5632 	return ret;
5633 }
5634 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5635 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5636 					  unsigned long size, int cpu_id)
5637 {
5638 	int ret = size;
5639 
5640 	mutex_lock(&trace_types_lock);
5641 
5642 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5643 		/* make sure, this cpu is enabled in the mask */
5644 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5645 			ret = -EINVAL;
5646 			goto out;
5647 		}
5648 	}
5649 
5650 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5651 	if (ret < 0)
5652 		ret = -ENOMEM;
5653 
5654 out:
5655 	mutex_unlock(&trace_types_lock);
5656 
5657 	return ret;
5658 }
5659 
5660 
5661 /**
5662  * tracing_update_buffers - used by tracing facility to expand ring buffers
5663  *
5664  * To save on memory when the tracing is never used on a system with it
5665  * configured in. The ring buffers are set to a minimum size. But once
5666  * a user starts to use the tracing facility, then they need to grow
5667  * to their default size.
5668  *
5669  * This function is to be called when a tracer is about to be used.
5670  */
tracing_update_buffers(void)5671 int tracing_update_buffers(void)
5672 {
5673 	int ret = 0;
5674 
5675 	mutex_lock(&trace_types_lock);
5676 	if (!ring_buffer_expanded)
5677 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5678 						RING_BUFFER_ALL_CPUS);
5679 	mutex_unlock(&trace_types_lock);
5680 
5681 	return ret;
5682 }
5683 
5684 struct trace_option_dentry;
5685 
5686 static void
5687 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5688 
5689 /*
5690  * Used to clear out the tracer before deletion of an instance.
5691  * Must have trace_types_lock held.
5692  */
tracing_set_nop(struct trace_array * tr)5693 static void tracing_set_nop(struct trace_array *tr)
5694 {
5695 	if (tr->current_trace == &nop_trace)
5696 		return;
5697 
5698 	tr->current_trace->enabled--;
5699 
5700 	if (tr->current_trace->reset)
5701 		tr->current_trace->reset(tr);
5702 
5703 	tr->current_trace = &nop_trace;
5704 }
5705 
5706 static bool tracer_options_updated;
5707 
add_tracer_options(struct trace_array * tr,struct tracer * t)5708 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5709 {
5710 	/* Only enable if the directory has been created already. */
5711 	if (!tr->dir)
5712 		return;
5713 
5714 	/* Only create trace option files after update_tracer_options finish */
5715 	if (!tracer_options_updated)
5716 		return;
5717 
5718 	create_trace_option_files(tr, t);
5719 }
5720 
tracing_set_tracer(struct trace_array * tr,const char * buf)5721 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5722 {
5723 	struct tracer *t;
5724 #ifdef CONFIG_TRACER_MAX_TRACE
5725 	bool had_max_tr;
5726 #endif
5727 	int ret = 0;
5728 
5729 	mutex_lock(&trace_types_lock);
5730 
5731 	if (!ring_buffer_expanded) {
5732 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5733 						RING_BUFFER_ALL_CPUS);
5734 		if (ret < 0)
5735 			goto out;
5736 		ret = 0;
5737 	}
5738 
5739 	for (t = trace_types; t; t = t->next) {
5740 		if (strcmp(t->name, buf) == 0)
5741 			break;
5742 	}
5743 	if (!t) {
5744 		ret = -EINVAL;
5745 		goto out;
5746 	}
5747 	if (t == tr->current_trace)
5748 		goto out;
5749 
5750 #ifdef CONFIG_TRACER_SNAPSHOT
5751 	if (t->use_max_tr) {
5752 		local_irq_disable();
5753 		arch_spin_lock(&tr->max_lock);
5754 		if (tr->cond_snapshot)
5755 			ret = -EBUSY;
5756 		arch_spin_unlock(&tr->max_lock);
5757 		local_irq_enable();
5758 		if (ret)
5759 			goto out;
5760 	}
5761 #endif
5762 	/* Some tracers won't work on kernel command line */
5763 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5764 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5765 			t->name);
5766 		goto out;
5767 	}
5768 
5769 	/* Some tracers are only allowed for the top level buffer */
5770 	if (!trace_ok_for_array(t, tr)) {
5771 		ret = -EINVAL;
5772 		goto out;
5773 	}
5774 
5775 	/* If trace pipe files are being read, we can't change the tracer */
5776 	if (tr->current_trace->ref) {
5777 		ret = -EBUSY;
5778 		goto out;
5779 	}
5780 
5781 	trace_branch_disable();
5782 
5783 	tr->current_trace->enabled--;
5784 
5785 	if (tr->current_trace->reset)
5786 		tr->current_trace->reset(tr);
5787 
5788 	/* Current trace needs to be nop_trace before synchronize_rcu */
5789 	tr->current_trace = &nop_trace;
5790 
5791 #ifdef CONFIG_TRACER_MAX_TRACE
5792 	had_max_tr = tr->allocated_snapshot;
5793 
5794 	if (had_max_tr && !t->use_max_tr) {
5795 		/*
5796 		 * We need to make sure that the update_max_tr sees that
5797 		 * current_trace changed to nop_trace to keep it from
5798 		 * swapping the buffers after we resize it.
5799 		 * The update_max_tr is called from interrupts disabled
5800 		 * so a synchronized_sched() is sufficient.
5801 		 */
5802 		synchronize_rcu();
5803 		free_snapshot(tr);
5804 	}
5805 #endif
5806 
5807 #ifdef CONFIG_TRACER_MAX_TRACE
5808 	if (t->use_max_tr && !had_max_tr) {
5809 		ret = tracing_alloc_snapshot_instance(tr);
5810 		if (ret < 0)
5811 			goto out;
5812 	}
5813 #endif
5814 
5815 	if (t->init) {
5816 		ret = tracer_init(t, tr);
5817 		if (ret)
5818 			goto out;
5819 	}
5820 
5821 	tr->current_trace = t;
5822 	tr->current_trace->enabled++;
5823 	trace_branch_enable(tr);
5824  out:
5825 	mutex_unlock(&trace_types_lock);
5826 
5827 	return ret;
5828 }
5829 
5830 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5831 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5832 			size_t cnt, loff_t *ppos)
5833 {
5834 	struct trace_array *tr = filp->private_data;
5835 	char buf[MAX_TRACER_SIZE+1];
5836 	int i;
5837 	size_t ret;
5838 	int err;
5839 
5840 	ret = cnt;
5841 
5842 	if (cnt > MAX_TRACER_SIZE)
5843 		cnt = MAX_TRACER_SIZE;
5844 
5845 	if (copy_from_user(buf, ubuf, cnt))
5846 		return -EFAULT;
5847 
5848 	buf[cnt] = 0;
5849 
5850 	/* strip ending whitespace. */
5851 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5852 		buf[i] = 0;
5853 
5854 	err = tracing_set_tracer(tr, buf);
5855 	if (err)
5856 		return err;
5857 
5858 	*ppos += ret;
5859 
5860 	return ret;
5861 }
5862 
5863 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5864 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5865 		   size_t cnt, loff_t *ppos)
5866 {
5867 	char buf[64];
5868 	int r;
5869 
5870 	r = snprintf(buf, sizeof(buf), "%ld\n",
5871 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5872 	if (r > sizeof(buf))
5873 		r = sizeof(buf);
5874 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5875 }
5876 
5877 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5878 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5879 		    size_t cnt, loff_t *ppos)
5880 {
5881 	unsigned long val;
5882 	int ret;
5883 
5884 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5885 	if (ret)
5886 		return ret;
5887 
5888 	*ptr = val * 1000;
5889 
5890 	return cnt;
5891 }
5892 
5893 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5894 tracing_thresh_read(struct file *filp, char __user *ubuf,
5895 		    size_t cnt, loff_t *ppos)
5896 {
5897 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5898 }
5899 
5900 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5901 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5902 		     size_t cnt, loff_t *ppos)
5903 {
5904 	struct trace_array *tr = filp->private_data;
5905 	int ret;
5906 
5907 	mutex_lock(&trace_types_lock);
5908 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5909 	if (ret < 0)
5910 		goto out;
5911 
5912 	if (tr->current_trace->update_thresh) {
5913 		ret = tr->current_trace->update_thresh(tr);
5914 		if (ret < 0)
5915 			goto out;
5916 	}
5917 
5918 	ret = cnt;
5919 out:
5920 	mutex_unlock(&trace_types_lock);
5921 
5922 	return ret;
5923 }
5924 
5925 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5926 
5927 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5928 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5929 		     size_t cnt, loff_t *ppos)
5930 {
5931 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5932 }
5933 
5934 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5935 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5936 		      size_t cnt, loff_t *ppos)
5937 {
5938 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5939 }
5940 
5941 #endif
5942 
tracing_open_pipe(struct inode * inode,struct file * filp)5943 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5944 {
5945 	struct trace_array *tr = inode->i_private;
5946 	struct trace_iterator *iter;
5947 	int ret;
5948 
5949 	ret = tracing_check_open_get_tr(tr);
5950 	if (ret)
5951 		return ret;
5952 
5953 	mutex_lock(&trace_types_lock);
5954 
5955 	/* create a buffer to store the information to pass to userspace */
5956 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5957 	if (!iter) {
5958 		ret = -ENOMEM;
5959 		__trace_array_put(tr);
5960 		goto out;
5961 	}
5962 
5963 	trace_seq_init(&iter->seq);
5964 	iter->trace = tr->current_trace;
5965 
5966 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5967 		ret = -ENOMEM;
5968 		goto fail;
5969 	}
5970 
5971 	/* trace pipe does not show start of buffer */
5972 	cpumask_setall(iter->started);
5973 
5974 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5975 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5976 
5977 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5978 	if (trace_clocks[tr->clock_id].in_ns)
5979 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5980 
5981 	iter->tr = tr;
5982 	iter->trace_buffer = &tr->trace_buffer;
5983 	iter->cpu_file = tracing_get_cpu(inode);
5984 	mutex_init(&iter->mutex);
5985 	filp->private_data = iter;
5986 
5987 	if (iter->trace->pipe_open)
5988 		iter->trace->pipe_open(iter);
5989 
5990 	nonseekable_open(inode, filp);
5991 
5992 	tr->current_trace->ref++;
5993 out:
5994 	mutex_unlock(&trace_types_lock);
5995 	return ret;
5996 
5997 fail:
5998 	kfree(iter);
5999 	__trace_array_put(tr);
6000 	mutex_unlock(&trace_types_lock);
6001 	return ret;
6002 }
6003 
tracing_release_pipe(struct inode * inode,struct file * file)6004 static int tracing_release_pipe(struct inode *inode, struct file *file)
6005 {
6006 	struct trace_iterator *iter = file->private_data;
6007 	struct trace_array *tr = inode->i_private;
6008 
6009 	mutex_lock(&trace_types_lock);
6010 
6011 	tr->current_trace->ref--;
6012 
6013 	if (iter->trace->pipe_close)
6014 		iter->trace->pipe_close(iter);
6015 
6016 	mutex_unlock(&trace_types_lock);
6017 
6018 	free_cpumask_var(iter->started);
6019 	mutex_destroy(&iter->mutex);
6020 	kfree(iter);
6021 
6022 	trace_array_put(tr);
6023 
6024 	return 0;
6025 }
6026 
6027 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6028 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6029 {
6030 	struct trace_array *tr = iter->tr;
6031 
6032 	/* Iterators are static, they should be filled or empty */
6033 	if (trace_buffer_iter(iter, iter->cpu_file))
6034 		return EPOLLIN | EPOLLRDNORM;
6035 
6036 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6037 		/*
6038 		 * Always select as readable when in blocking mode
6039 		 */
6040 		return EPOLLIN | EPOLLRDNORM;
6041 	else
6042 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
6043 					     filp, poll_table);
6044 }
6045 
6046 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6047 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6048 {
6049 	struct trace_iterator *iter = filp->private_data;
6050 
6051 	return trace_poll(iter, filp, poll_table);
6052 }
6053 
6054 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6055 static int tracing_wait_pipe(struct file *filp)
6056 {
6057 	struct trace_iterator *iter = filp->private_data;
6058 	int ret;
6059 
6060 	while (trace_empty(iter)) {
6061 
6062 		if ((filp->f_flags & O_NONBLOCK)) {
6063 			return -EAGAIN;
6064 		}
6065 
6066 		/*
6067 		 * We block until we read something and tracing is disabled.
6068 		 * We still block if tracing is disabled, but we have never
6069 		 * read anything. This allows a user to cat this file, and
6070 		 * then enable tracing. But after we have read something,
6071 		 * we give an EOF when tracing is again disabled.
6072 		 *
6073 		 * iter->pos will be 0 if we haven't read anything.
6074 		 */
6075 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6076 			break;
6077 
6078 		mutex_unlock(&iter->mutex);
6079 
6080 		ret = wait_on_pipe(iter, 0);
6081 
6082 		mutex_lock(&iter->mutex);
6083 
6084 		if (ret)
6085 			return ret;
6086 	}
6087 
6088 	return 1;
6089 }
6090 
6091 /*
6092  * Consumer reader.
6093  */
6094 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6095 tracing_read_pipe(struct file *filp, char __user *ubuf,
6096 		  size_t cnt, loff_t *ppos)
6097 {
6098 	struct trace_iterator *iter = filp->private_data;
6099 	ssize_t sret;
6100 
6101 	/*
6102 	 * Avoid more than one consumer on a single file descriptor
6103 	 * This is just a matter of traces coherency, the ring buffer itself
6104 	 * is protected.
6105 	 */
6106 	mutex_lock(&iter->mutex);
6107 
6108 	/* return any leftover data */
6109 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6110 	if (sret != -EBUSY)
6111 		goto out;
6112 
6113 	trace_seq_init(&iter->seq);
6114 
6115 	if (iter->trace->read) {
6116 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6117 		if (sret)
6118 			goto out;
6119 	}
6120 
6121 waitagain:
6122 	sret = tracing_wait_pipe(filp);
6123 	if (sret <= 0)
6124 		goto out;
6125 
6126 	/* stop when tracing is finished */
6127 	if (trace_empty(iter)) {
6128 		sret = 0;
6129 		goto out;
6130 	}
6131 
6132 	if (cnt >= PAGE_SIZE)
6133 		cnt = PAGE_SIZE - 1;
6134 
6135 	/* reset all but tr, trace, and overruns */
6136 	memset(&iter->seq, 0,
6137 	       sizeof(struct trace_iterator) -
6138 	       offsetof(struct trace_iterator, seq));
6139 	cpumask_clear(iter->started);
6140 	trace_seq_init(&iter->seq);
6141 	iter->pos = -1;
6142 
6143 	trace_event_read_lock();
6144 	trace_access_lock(iter->cpu_file);
6145 	while (trace_find_next_entry_inc(iter) != NULL) {
6146 		enum print_line_t ret;
6147 		int save_len = iter->seq.seq.len;
6148 
6149 		ret = print_trace_line(iter);
6150 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6151 			/*
6152 			 * If one print_trace_line() fills entire trace_seq in one shot,
6153 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6154 			 * In this case, we need to consume it, otherwise, loop will peek
6155 			 * this event next time, resulting in an infinite loop.
6156 			 */
6157 			if (save_len == 0) {
6158 				iter->seq.full = 0;
6159 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6160 				trace_consume(iter);
6161 				break;
6162 			}
6163 
6164 			/* In other cases, don't print partial lines */
6165 			iter->seq.seq.len = save_len;
6166 			break;
6167 		}
6168 		if (ret != TRACE_TYPE_NO_CONSUME)
6169 			trace_consume(iter);
6170 
6171 		if (trace_seq_used(&iter->seq) >= cnt)
6172 			break;
6173 
6174 		/*
6175 		 * Setting the full flag means we reached the trace_seq buffer
6176 		 * size and we should leave by partial output condition above.
6177 		 * One of the trace_seq_* functions is not used properly.
6178 		 */
6179 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6180 			  iter->ent->type);
6181 	}
6182 	trace_access_unlock(iter->cpu_file);
6183 	trace_event_read_unlock();
6184 
6185 	/* Now copy what we have to the user */
6186 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6187 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6188 		trace_seq_init(&iter->seq);
6189 
6190 	/*
6191 	 * If there was nothing to send to user, in spite of consuming trace
6192 	 * entries, go back to wait for more entries.
6193 	 */
6194 	if (sret == -EBUSY)
6195 		goto waitagain;
6196 
6197 out:
6198 	mutex_unlock(&iter->mutex);
6199 
6200 	return sret;
6201 }
6202 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6203 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6204 				     unsigned int idx)
6205 {
6206 	__free_page(spd->pages[idx]);
6207 }
6208 
6209 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6210 	.confirm		= generic_pipe_buf_confirm,
6211 	.release		= generic_pipe_buf_release,
6212 	.steal			= generic_pipe_buf_steal,
6213 	.get			= generic_pipe_buf_get,
6214 };
6215 
6216 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6217 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6218 {
6219 	size_t count;
6220 	int save_len;
6221 	int ret;
6222 
6223 	/* Seq buffer is page-sized, exactly what we need. */
6224 	for (;;) {
6225 		save_len = iter->seq.seq.len;
6226 		ret = print_trace_line(iter);
6227 
6228 		if (trace_seq_has_overflowed(&iter->seq)) {
6229 			iter->seq.seq.len = save_len;
6230 			break;
6231 		}
6232 
6233 		/*
6234 		 * This should not be hit, because it should only
6235 		 * be set if the iter->seq overflowed. But check it
6236 		 * anyway to be safe.
6237 		 */
6238 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6239 			iter->seq.seq.len = save_len;
6240 			break;
6241 		}
6242 
6243 		count = trace_seq_used(&iter->seq) - save_len;
6244 		if (rem < count) {
6245 			rem = 0;
6246 			iter->seq.seq.len = save_len;
6247 			break;
6248 		}
6249 
6250 		if (ret != TRACE_TYPE_NO_CONSUME)
6251 			trace_consume(iter);
6252 		rem -= count;
6253 		if (!trace_find_next_entry_inc(iter))	{
6254 			rem = 0;
6255 			iter->ent = NULL;
6256 			break;
6257 		}
6258 	}
6259 
6260 	return rem;
6261 }
6262 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6263 static ssize_t tracing_splice_read_pipe(struct file *filp,
6264 					loff_t *ppos,
6265 					struct pipe_inode_info *pipe,
6266 					size_t len,
6267 					unsigned int flags)
6268 {
6269 	struct page *pages_def[PIPE_DEF_BUFFERS];
6270 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6271 	struct trace_iterator *iter = filp->private_data;
6272 	struct splice_pipe_desc spd = {
6273 		.pages		= pages_def,
6274 		.partial	= partial_def,
6275 		.nr_pages	= 0, /* This gets updated below. */
6276 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6277 		.ops		= &tracing_pipe_buf_ops,
6278 		.spd_release	= tracing_spd_release_pipe,
6279 	};
6280 	ssize_t ret;
6281 	size_t rem;
6282 	unsigned int i;
6283 
6284 	if (splice_grow_spd(pipe, &spd))
6285 		return -ENOMEM;
6286 
6287 	mutex_lock(&iter->mutex);
6288 
6289 	if (iter->trace->splice_read) {
6290 		ret = iter->trace->splice_read(iter, filp,
6291 					       ppos, pipe, len, flags);
6292 		if (ret)
6293 			goto out_err;
6294 	}
6295 
6296 	ret = tracing_wait_pipe(filp);
6297 	if (ret <= 0)
6298 		goto out_err;
6299 
6300 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6301 		ret = -EFAULT;
6302 		goto out_err;
6303 	}
6304 
6305 	trace_event_read_lock();
6306 	trace_access_lock(iter->cpu_file);
6307 
6308 	/* Fill as many pages as possible. */
6309 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6310 		spd.pages[i] = alloc_page(GFP_KERNEL);
6311 		if (!spd.pages[i])
6312 			break;
6313 
6314 		rem = tracing_fill_pipe_page(rem, iter);
6315 
6316 		/* Copy the data into the page, so we can start over. */
6317 		ret = trace_seq_to_buffer(&iter->seq,
6318 					  page_address(spd.pages[i]),
6319 					  trace_seq_used(&iter->seq));
6320 		if (ret < 0) {
6321 			__free_page(spd.pages[i]);
6322 			break;
6323 		}
6324 		spd.partial[i].offset = 0;
6325 		spd.partial[i].len = trace_seq_used(&iter->seq);
6326 
6327 		trace_seq_init(&iter->seq);
6328 	}
6329 
6330 	trace_access_unlock(iter->cpu_file);
6331 	trace_event_read_unlock();
6332 	mutex_unlock(&iter->mutex);
6333 
6334 	spd.nr_pages = i;
6335 
6336 	if (i)
6337 		ret = splice_to_pipe(pipe, &spd);
6338 	else
6339 		ret = 0;
6340 out:
6341 	splice_shrink_spd(&spd);
6342 	return ret;
6343 
6344 out_err:
6345 	mutex_unlock(&iter->mutex);
6346 	goto out;
6347 }
6348 
6349 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6350 tracing_entries_read(struct file *filp, char __user *ubuf,
6351 		     size_t cnt, loff_t *ppos)
6352 {
6353 	struct inode *inode = file_inode(filp);
6354 	struct trace_array *tr = inode->i_private;
6355 	int cpu = tracing_get_cpu(inode);
6356 	char buf[64];
6357 	int r = 0;
6358 	ssize_t ret;
6359 
6360 	mutex_lock(&trace_types_lock);
6361 
6362 	if (cpu == RING_BUFFER_ALL_CPUS) {
6363 		int cpu, buf_size_same;
6364 		unsigned long size;
6365 
6366 		size = 0;
6367 		buf_size_same = 1;
6368 		/* check if all cpu sizes are same */
6369 		for_each_tracing_cpu(cpu) {
6370 			/* fill in the size from first enabled cpu */
6371 			if (size == 0)
6372 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6373 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6374 				buf_size_same = 0;
6375 				break;
6376 			}
6377 		}
6378 
6379 		if (buf_size_same) {
6380 			if (!ring_buffer_expanded)
6381 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6382 					    size >> 10,
6383 					    trace_buf_size >> 10);
6384 			else
6385 				r = sprintf(buf, "%lu\n", size >> 10);
6386 		} else
6387 			r = sprintf(buf, "X\n");
6388 	} else
6389 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6390 
6391 	mutex_unlock(&trace_types_lock);
6392 
6393 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6394 	return ret;
6395 }
6396 
6397 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6398 tracing_entries_write(struct file *filp, const char __user *ubuf,
6399 		      size_t cnt, loff_t *ppos)
6400 {
6401 	struct inode *inode = file_inode(filp);
6402 	struct trace_array *tr = inode->i_private;
6403 	unsigned long val;
6404 	int ret;
6405 
6406 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6407 	if (ret)
6408 		return ret;
6409 
6410 	/* must have at least 1 entry */
6411 	if (!val)
6412 		return -EINVAL;
6413 
6414 	/* value is in KB */
6415 	val <<= 10;
6416 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6417 	if (ret < 0)
6418 		return ret;
6419 
6420 	*ppos += cnt;
6421 
6422 	return cnt;
6423 }
6424 
6425 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6426 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6427 				size_t cnt, loff_t *ppos)
6428 {
6429 	struct trace_array *tr = filp->private_data;
6430 	char buf[64];
6431 	int r, cpu;
6432 	unsigned long size = 0, expanded_size = 0;
6433 
6434 	mutex_lock(&trace_types_lock);
6435 	for_each_tracing_cpu(cpu) {
6436 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6437 		if (!ring_buffer_expanded)
6438 			expanded_size += trace_buf_size >> 10;
6439 	}
6440 	if (ring_buffer_expanded)
6441 		r = sprintf(buf, "%lu\n", size);
6442 	else
6443 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6444 	mutex_unlock(&trace_types_lock);
6445 
6446 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6447 }
6448 
6449 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6450 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6451 			  size_t cnt, loff_t *ppos)
6452 {
6453 	/*
6454 	 * There is no need to read what the user has written, this function
6455 	 * is just to make sure that there is no error when "echo" is used
6456 	 */
6457 
6458 	*ppos += cnt;
6459 
6460 	return cnt;
6461 }
6462 
6463 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6464 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6465 {
6466 	struct trace_array *tr = inode->i_private;
6467 
6468 	/* disable tracing ? */
6469 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6470 		tracer_tracing_off(tr);
6471 	/* resize the ring buffer to 0 */
6472 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6473 
6474 	trace_array_put(tr);
6475 
6476 	return 0;
6477 }
6478 
6479 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6480 tracing_mark_write(struct file *filp, const char __user *ubuf,
6481 					size_t cnt, loff_t *fpos)
6482 {
6483 	struct trace_array *tr = filp->private_data;
6484 	struct ring_buffer_event *event;
6485 	enum event_trigger_type tt = ETT_NONE;
6486 	struct ring_buffer *buffer;
6487 	struct print_entry *entry;
6488 	unsigned long irq_flags;
6489 	ssize_t written;
6490 	int size;
6491 	int len;
6492 
6493 /* Used in tracing_mark_raw_write() as well */
6494 #define FAULTED_STR "<faulted>"
6495 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6496 
6497 	if (tracing_disabled)
6498 		return -EINVAL;
6499 
6500 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6501 		return -EINVAL;
6502 
6503 	if (cnt > TRACE_BUF_SIZE)
6504 		cnt = TRACE_BUF_SIZE;
6505 
6506 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6507 
6508 	local_save_flags(irq_flags);
6509 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6510 
6511 	/* If less than "<faulted>", then make sure we can still add that */
6512 	if (cnt < FAULTED_SIZE)
6513 		size += FAULTED_SIZE - cnt;
6514 
6515 	buffer = tr->trace_buffer.buffer;
6516 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6517 					    irq_flags, preempt_count());
6518 	if (unlikely(!event))
6519 		/* Ring buffer disabled, return as if not open for write */
6520 		return -EBADF;
6521 
6522 	entry = ring_buffer_event_data(event);
6523 	entry->ip = _THIS_IP_;
6524 
6525 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6526 	if (len) {
6527 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6528 		cnt = FAULTED_SIZE;
6529 		written = -EFAULT;
6530 	} else
6531 		written = cnt;
6532 	len = cnt;
6533 
6534 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6535 		/* do not add \n before testing triggers, but add \0 */
6536 		entry->buf[cnt] = '\0';
6537 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6538 	}
6539 
6540 	if (entry->buf[cnt - 1] != '\n') {
6541 		entry->buf[cnt] = '\n';
6542 		entry->buf[cnt + 1] = '\0';
6543 	} else
6544 		entry->buf[cnt] = '\0';
6545 
6546 	__buffer_unlock_commit(buffer, event);
6547 
6548 	if (tt)
6549 		event_triggers_post_call(tr->trace_marker_file, tt);
6550 
6551 	if (written > 0)
6552 		*fpos += written;
6553 
6554 	return written;
6555 }
6556 
6557 /* Limit it for now to 3K (including tag) */
6558 #define RAW_DATA_MAX_SIZE (1024*3)
6559 
6560 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6561 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6562 					size_t cnt, loff_t *fpos)
6563 {
6564 	struct trace_array *tr = filp->private_data;
6565 	struct ring_buffer_event *event;
6566 	struct ring_buffer *buffer;
6567 	struct raw_data_entry *entry;
6568 	unsigned long irq_flags;
6569 	ssize_t written;
6570 	int size;
6571 	int len;
6572 
6573 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6574 
6575 	if (tracing_disabled)
6576 		return -EINVAL;
6577 
6578 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6579 		return -EINVAL;
6580 
6581 	/* The marker must at least have a tag id */
6582 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6583 		return -EINVAL;
6584 
6585 	if (cnt > TRACE_BUF_SIZE)
6586 		cnt = TRACE_BUF_SIZE;
6587 
6588 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6589 
6590 	local_save_flags(irq_flags);
6591 	size = sizeof(*entry) + cnt;
6592 	if (cnt < FAULT_SIZE_ID)
6593 		size += FAULT_SIZE_ID - cnt;
6594 
6595 	buffer = tr->trace_buffer.buffer;
6596 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6597 					    irq_flags, preempt_count());
6598 	if (!event)
6599 		/* Ring buffer disabled, return as if not open for write */
6600 		return -EBADF;
6601 
6602 	entry = ring_buffer_event_data(event);
6603 
6604 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6605 	if (len) {
6606 		entry->id = -1;
6607 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6608 		written = -EFAULT;
6609 	} else
6610 		written = cnt;
6611 
6612 	__buffer_unlock_commit(buffer, event);
6613 
6614 	if (written > 0)
6615 		*fpos += written;
6616 
6617 	return written;
6618 }
6619 
tracing_clock_show(struct seq_file * m,void * v)6620 static int tracing_clock_show(struct seq_file *m, void *v)
6621 {
6622 	struct trace_array *tr = m->private;
6623 	int i;
6624 
6625 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6626 		seq_printf(m,
6627 			"%s%s%s%s", i ? " " : "",
6628 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6629 			i == tr->clock_id ? "]" : "");
6630 	seq_putc(m, '\n');
6631 
6632 	return 0;
6633 }
6634 
tracing_set_clock(struct trace_array * tr,const char * clockstr)6635 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6636 {
6637 	int i;
6638 
6639 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6640 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6641 			break;
6642 	}
6643 	if (i == ARRAY_SIZE(trace_clocks))
6644 		return -EINVAL;
6645 
6646 	mutex_lock(&trace_types_lock);
6647 
6648 	tr->clock_id = i;
6649 
6650 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6651 
6652 	/*
6653 	 * New clock may not be consistent with the previous clock.
6654 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6655 	 */
6656 	tracing_reset_online_cpus(&tr->trace_buffer);
6657 
6658 #ifdef CONFIG_TRACER_MAX_TRACE
6659 	if (tr->max_buffer.buffer)
6660 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6661 	tracing_reset_online_cpus(&tr->max_buffer);
6662 #endif
6663 
6664 	mutex_unlock(&trace_types_lock);
6665 
6666 	return 0;
6667 }
6668 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6669 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6670 				   size_t cnt, loff_t *fpos)
6671 {
6672 	struct seq_file *m = filp->private_data;
6673 	struct trace_array *tr = m->private;
6674 	char buf[64];
6675 	const char *clockstr;
6676 	int ret;
6677 
6678 	if (cnt >= sizeof(buf))
6679 		return -EINVAL;
6680 
6681 	if (copy_from_user(buf, ubuf, cnt))
6682 		return -EFAULT;
6683 
6684 	buf[cnt] = 0;
6685 
6686 	clockstr = strstrip(buf);
6687 
6688 	ret = tracing_set_clock(tr, clockstr);
6689 	if (ret)
6690 		return ret;
6691 
6692 	*fpos += cnt;
6693 
6694 	return cnt;
6695 }
6696 
tracing_clock_open(struct inode * inode,struct file * file)6697 static int tracing_clock_open(struct inode *inode, struct file *file)
6698 {
6699 	struct trace_array *tr = inode->i_private;
6700 	int ret;
6701 
6702 	ret = tracing_check_open_get_tr(tr);
6703 	if (ret)
6704 		return ret;
6705 
6706 	ret = single_open(file, tracing_clock_show, inode->i_private);
6707 	if (ret < 0)
6708 		trace_array_put(tr);
6709 
6710 	return ret;
6711 }
6712 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6713 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6714 {
6715 	struct trace_array *tr = m->private;
6716 
6717 	mutex_lock(&trace_types_lock);
6718 
6719 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6720 		seq_puts(m, "delta [absolute]\n");
6721 	else
6722 		seq_puts(m, "[delta] absolute\n");
6723 
6724 	mutex_unlock(&trace_types_lock);
6725 
6726 	return 0;
6727 }
6728 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6729 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6730 {
6731 	struct trace_array *tr = inode->i_private;
6732 	int ret;
6733 
6734 	ret = tracing_check_open_get_tr(tr);
6735 	if (ret)
6736 		return ret;
6737 
6738 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6739 	if (ret < 0)
6740 		trace_array_put(tr);
6741 
6742 	return ret;
6743 }
6744 
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)6745 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6746 {
6747 	int ret = 0;
6748 
6749 	mutex_lock(&trace_types_lock);
6750 
6751 	if (abs && tr->time_stamp_abs_ref++)
6752 		goto out;
6753 
6754 	if (!abs) {
6755 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6756 			ret = -EINVAL;
6757 			goto out;
6758 		}
6759 
6760 		if (--tr->time_stamp_abs_ref)
6761 			goto out;
6762 	}
6763 
6764 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6765 
6766 #ifdef CONFIG_TRACER_MAX_TRACE
6767 	if (tr->max_buffer.buffer)
6768 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6769 #endif
6770  out:
6771 	mutex_unlock(&trace_types_lock);
6772 
6773 	return ret;
6774 }
6775 
6776 struct ftrace_buffer_info {
6777 	struct trace_iterator	iter;
6778 	void			*spare;
6779 	unsigned int		spare_cpu;
6780 	unsigned int		read;
6781 };
6782 
6783 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)6784 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6785 {
6786 	struct trace_array *tr = inode->i_private;
6787 	struct trace_iterator *iter;
6788 	struct seq_file *m;
6789 	int ret;
6790 
6791 	ret = tracing_check_open_get_tr(tr);
6792 	if (ret)
6793 		return ret;
6794 
6795 	if (file->f_mode & FMODE_READ) {
6796 		iter = __tracing_open(inode, file, true);
6797 		if (IS_ERR(iter))
6798 			ret = PTR_ERR(iter);
6799 	} else {
6800 		/* Writes still need the seq_file to hold the private data */
6801 		ret = -ENOMEM;
6802 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6803 		if (!m)
6804 			goto out;
6805 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6806 		if (!iter) {
6807 			kfree(m);
6808 			goto out;
6809 		}
6810 		ret = 0;
6811 
6812 		iter->tr = tr;
6813 		iter->trace_buffer = &tr->max_buffer;
6814 		iter->cpu_file = tracing_get_cpu(inode);
6815 		m->private = iter;
6816 		file->private_data = m;
6817 	}
6818 out:
6819 	if (ret < 0)
6820 		trace_array_put(tr);
6821 
6822 	return ret;
6823 }
6824 
tracing_swap_cpu_buffer(void * tr)6825 static void tracing_swap_cpu_buffer(void *tr)
6826 {
6827 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
6828 }
6829 
6830 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6831 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6832 		       loff_t *ppos)
6833 {
6834 	struct seq_file *m = filp->private_data;
6835 	struct trace_iterator *iter = m->private;
6836 	struct trace_array *tr = iter->tr;
6837 	unsigned long val;
6838 	int ret;
6839 
6840 	ret = tracing_update_buffers();
6841 	if (ret < 0)
6842 		return ret;
6843 
6844 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6845 	if (ret)
6846 		return ret;
6847 
6848 	mutex_lock(&trace_types_lock);
6849 
6850 	if (tr->current_trace->use_max_tr) {
6851 		ret = -EBUSY;
6852 		goto out;
6853 	}
6854 
6855 	local_irq_disable();
6856 	arch_spin_lock(&tr->max_lock);
6857 	if (tr->cond_snapshot)
6858 		ret = -EBUSY;
6859 	arch_spin_unlock(&tr->max_lock);
6860 	local_irq_enable();
6861 	if (ret)
6862 		goto out;
6863 
6864 	switch (val) {
6865 	case 0:
6866 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6867 			ret = -EINVAL;
6868 			break;
6869 		}
6870 		if (tr->allocated_snapshot)
6871 			free_snapshot(tr);
6872 		break;
6873 	case 1:
6874 /* Only allow per-cpu swap if the ring buffer supports it */
6875 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6876 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6877 			ret = -EINVAL;
6878 			break;
6879 		}
6880 #endif
6881 		if (tr->allocated_snapshot)
6882 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6883 					&tr->trace_buffer, iter->cpu_file);
6884 		else
6885 			ret = tracing_alloc_snapshot_instance(tr);
6886 		if (ret < 0)
6887 			break;
6888 		/* Now, we're going to swap */
6889 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
6890 			local_irq_disable();
6891 			update_max_tr(tr, current, smp_processor_id(), NULL);
6892 			local_irq_enable();
6893 		} else {
6894 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
6895 						 (void *)tr, 1);
6896 		}
6897 		break;
6898 	default:
6899 		if (tr->allocated_snapshot) {
6900 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6901 				tracing_reset_online_cpus(&tr->max_buffer);
6902 			else
6903 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6904 		}
6905 		break;
6906 	}
6907 
6908 	if (ret >= 0) {
6909 		*ppos += cnt;
6910 		ret = cnt;
6911 	}
6912 out:
6913 	mutex_unlock(&trace_types_lock);
6914 	return ret;
6915 }
6916 
tracing_snapshot_release(struct inode * inode,struct file * file)6917 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6918 {
6919 	struct seq_file *m = file->private_data;
6920 	int ret;
6921 
6922 	ret = tracing_release(inode, file);
6923 
6924 	if (file->f_mode & FMODE_READ)
6925 		return ret;
6926 
6927 	/* If write only, the seq_file is just a stub */
6928 	if (m)
6929 		kfree(m->private);
6930 	kfree(m);
6931 
6932 	return 0;
6933 }
6934 
6935 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6936 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6937 				    size_t count, loff_t *ppos);
6938 static int tracing_buffers_release(struct inode *inode, struct file *file);
6939 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6940 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6941 
snapshot_raw_open(struct inode * inode,struct file * filp)6942 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6943 {
6944 	struct ftrace_buffer_info *info;
6945 	int ret;
6946 
6947 	/* The following checks for tracefs lockdown */
6948 	ret = tracing_buffers_open(inode, filp);
6949 	if (ret < 0)
6950 		return ret;
6951 
6952 	info = filp->private_data;
6953 
6954 	if (info->iter.trace->use_max_tr) {
6955 		tracing_buffers_release(inode, filp);
6956 		return -EBUSY;
6957 	}
6958 
6959 	info->iter.snapshot = true;
6960 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6961 
6962 	return ret;
6963 }
6964 
6965 #endif /* CONFIG_TRACER_SNAPSHOT */
6966 
6967 
6968 static const struct file_operations tracing_thresh_fops = {
6969 	.open		= tracing_open_generic,
6970 	.read		= tracing_thresh_read,
6971 	.write		= tracing_thresh_write,
6972 	.llseek		= generic_file_llseek,
6973 };
6974 
6975 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6976 static const struct file_operations tracing_max_lat_fops = {
6977 	.open		= tracing_open_generic,
6978 	.read		= tracing_max_lat_read,
6979 	.write		= tracing_max_lat_write,
6980 	.llseek		= generic_file_llseek,
6981 };
6982 #endif
6983 
6984 static const struct file_operations set_tracer_fops = {
6985 	.open		= tracing_open_generic_tr,
6986 	.read		= tracing_set_trace_read,
6987 	.write		= tracing_set_trace_write,
6988 	.llseek		= generic_file_llseek,
6989 	.release	= tracing_release_generic_tr,
6990 };
6991 
6992 static const struct file_operations tracing_pipe_fops = {
6993 	.open		= tracing_open_pipe,
6994 	.poll		= tracing_poll_pipe,
6995 	.read		= tracing_read_pipe,
6996 	.splice_read	= tracing_splice_read_pipe,
6997 	.release	= tracing_release_pipe,
6998 	.llseek		= no_llseek,
6999 };
7000 
7001 static const struct file_operations tracing_entries_fops = {
7002 	.open		= tracing_open_generic_tr,
7003 	.read		= tracing_entries_read,
7004 	.write		= tracing_entries_write,
7005 	.llseek		= generic_file_llseek,
7006 	.release	= tracing_release_generic_tr,
7007 };
7008 
7009 static const struct file_operations tracing_total_entries_fops = {
7010 	.open		= tracing_open_generic_tr,
7011 	.read		= tracing_total_entries_read,
7012 	.llseek		= generic_file_llseek,
7013 	.release	= tracing_release_generic_tr,
7014 };
7015 
7016 static const struct file_operations tracing_free_buffer_fops = {
7017 	.open		= tracing_open_generic_tr,
7018 	.write		= tracing_free_buffer_write,
7019 	.release	= tracing_free_buffer_release,
7020 };
7021 
7022 static const struct file_operations tracing_mark_fops = {
7023 	.open		= tracing_open_generic_tr,
7024 	.write		= tracing_mark_write,
7025 	.llseek		= generic_file_llseek,
7026 	.release	= tracing_release_generic_tr,
7027 };
7028 
7029 static const struct file_operations tracing_mark_raw_fops = {
7030 	.open		= tracing_open_generic_tr,
7031 	.write		= tracing_mark_raw_write,
7032 	.llseek		= generic_file_llseek,
7033 	.release	= tracing_release_generic_tr,
7034 };
7035 
7036 static const struct file_operations trace_clock_fops = {
7037 	.open		= tracing_clock_open,
7038 	.read		= seq_read,
7039 	.llseek		= seq_lseek,
7040 	.release	= tracing_single_release_tr,
7041 	.write		= tracing_clock_write,
7042 };
7043 
7044 static const struct file_operations trace_time_stamp_mode_fops = {
7045 	.open		= tracing_time_stamp_mode_open,
7046 	.read		= seq_read,
7047 	.llseek		= seq_lseek,
7048 	.release	= tracing_single_release_tr,
7049 };
7050 
7051 #ifdef CONFIG_TRACER_SNAPSHOT
7052 static const struct file_operations snapshot_fops = {
7053 	.open		= tracing_snapshot_open,
7054 	.read		= seq_read,
7055 	.write		= tracing_snapshot_write,
7056 	.llseek		= tracing_lseek,
7057 	.release	= tracing_snapshot_release,
7058 };
7059 
7060 static const struct file_operations snapshot_raw_fops = {
7061 	.open		= snapshot_raw_open,
7062 	.read		= tracing_buffers_read,
7063 	.release	= tracing_buffers_release,
7064 	.splice_read	= tracing_buffers_splice_read,
7065 	.llseek		= no_llseek,
7066 };
7067 
7068 #endif /* CONFIG_TRACER_SNAPSHOT */
7069 
7070 #define TRACING_LOG_ERRS_MAX	8
7071 #define TRACING_LOG_LOC_MAX	128
7072 
7073 #define CMD_PREFIX "  Command: "
7074 
7075 struct err_info {
7076 	const char	**errs;	/* ptr to loc-specific array of err strings */
7077 	u8		type;	/* index into errs -> specific err string */
7078 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7079 	u64		ts;
7080 };
7081 
7082 struct tracing_log_err {
7083 	struct list_head	list;
7084 	struct err_info		info;
7085 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7086 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7087 };
7088 
7089 static DEFINE_MUTEX(tracing_err_log_lock);
7090 
get_tracing_log_err(struct trace_array * tr)7091 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7092 {
7093 	struct tracing_log_err *err;
7094 
7095 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7096 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7097 		if (!err)
7098 			err = ERR_PTR(-ENOMEM);
7099 		else
7100 			tr->n_err_log_entries++;
7101 
7102 		return err;
7103 	}
7104 
7105 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7106 	list_del(&err->list);
7107 
7108 	return err;
7109 }
7110 
7111 /**
7112  * err_pos - find the position of a string within a command for error careting
7113  * @cmd: The tracing command that caused the error
7114  * @str: The string to position the caret at within @cmd
7115  *
7116  * Finds the position of the first occurence of @str within @cmd.  The
7117  * return value can be passed to tracing_log_err() for caret placement
7118  * within @cmd.
7119  *
7120  * Returns the index within @cmd of the first occurence of @str or 0
7121  * if @str was not found.
7122  */
err_pos(char * cmd,const char * str)7123 unsigned int err_pos(char *cmd, const char *str)
7124 {
7125 	char *found;
7126 
7127 	if (WARN_ON(!strlen(cmd)))
7128 		return 0;
7129 
7130 	found = strstr(cmd, str);
7131 	if (found)
7132 		return found - cmd;
7133 
7134 	return 0;
7135 }
7136 
7137 /**
7138  * tracing_log_err - write an error to the tracing error log
7139  * @tr: The associated trace array for the error (NULL for top level array)
7140  * @loc: A string describing where the error occurred
7141  * @cmd: The tracing command that caused the error
7142  * @errs: The array of loc-specific static error strings
7143  * @type: The index into errs[], which produces the specific static err string
7144  * @pos: The position the caret should be placed in the cmd
7145  *
7146  * Writes an error into tracing/error_log of the form:
7147  *
7148  * <loc>: error: <text>
7149  *   Command: <cmd>
7150  *              ^
7151  *
7152  * tracing/error_log is a small log file containing the last
7153  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7154  * unless there has been a tracing error, and the error log can be
7155  * cleared and have its memory freed by writing the empty string in
7156  * truncation mode to it i.e. echo > tracing/error_log.
7157  *
7158  * NOTE: the @errs array along with the @type param are used to
7159  * produce a static error string - this string is not copied and saved
7160  * when the error is logged - only a pointer to it is saved.  See
7161  * existing callers for examples of how static strings are typically
7162  * defined for use with tracing_log_err().
7163  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7164 void tracing_log_err(struct trace_array *tr,
7165 		     const char *loc, const char *cmd,
7166 		     const char **errs, u8 type, u8 pos)
7167 {
7168 	struct tracing_log_err *err;
7169 
7170 	if (!tr)
7171 		tr = &global_trace;
7172 
7173 	mutex_lock(&tracing_err_log_lock);
7174 	err = get_tracing_log_err(tr);
7175 	if (PTR_ERR(err) == -ENOMEM) {
7176 		mutex_unlock(&tracing_err_log_lock);
7177 		return;
7178 	}
7179 
7180 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7181 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7182 
7183 	err->info.errs = errs;
7184 	err->info.type = type;
7185 	err->info.pos = pos;
7186 	err->info.ts = local_clock();
7187 
7188 	list_add_tail(&err->list, &tr->err_log);
7189 	mutex_unlock(&tracing_err_log_lock);
7190 }
7191 
clear_tracing_err_log(struct trace_array * tr)7192 static void clear_tracing_err_log(struct trace_array *tr)
7193 {
7194 	struct tracing_log_err *err, *next;
7195 
7196 	mutex_lock(&tracing_err_log_lock);
7197 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7198 		list_del(&err->list);
7199 		kfree(err);
7200 	}
7201 
7202 	tr->n_err_log_entries = 0;
7203 	mutex_unlock(&tracing_err_log_lock);
7204 }
7205 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7206 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7207 {
7208 	struct trace_array *tr = m->private;
7209 
7210 	mutex_lock(&tracing_err_log_lock);
7211 
7212 	return seq_list_start(&tr->err_log, *pos);
7213 }
7214 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7215 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7216 {
7217 	struct trace_array *tr = m->private;
7218 
7219 	return seq_list_next(v, &tr->err_log, pos);
7220 }
7221 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7222 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7223 {
7224 	mutex_unlock(&tracing_err_log_lock);
7225 }
7226 
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7227 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7228 {
7229 	u8 i;
7230 
7231 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7232 		seq_putc(m, ' ');
7233 	for (i = 0; i < pos; i++)
7234 		seq_putc(m, ' ');
7235 	seq_puts(m, "^\n");
7236 }
7237 
tracing_err_log_seq_show(struct seq_file * m,void * v)7238 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7239 {
7240 	struct tracing_log_err *err = v;
7241 
7242 	if (err) {
7243 		const char *err_text = err->info.errs[err->info.type];
7244 		u64 sec = err->info.ts;
7245 		u32 nsec;
7246 
7247 		nsec = do_div(sec, NSEC_PER_SEC);
7248 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7249 			   err->loc, err_text);
7250 		seq_printf(m, "%s", err->cmd);
7251 		tracing_err_log_show_pos(m, err->info.pos);
7252 	}
7253 
7254 	return 0;
7255 }
7256 
7257 static const struct seq_operations tracing_err_log_seq_ops = {
7258 	.start  = tracing_err_log_seq_start,
7259 	.next   = tracing_err_log_seq_next,
7260 	.stop   = tracing_err_log_seq_stop,
7261 	.show   = tracing_err_log_seq_show
7262 };
7263 
tracing_err_log_open(struct inode * inode,struct file * file)7264 static int tracing_err_log_open(struct inode *inode, struct file *file)
7265 {
7266 	struct trace_array *tr = inode->i_private;
7267 	int ret = 0;
7268 
7269 	ret = tracing_check_open_get_tr(tr);
7270 	if (ret)
7271 		return ret;
7272 
7273 	/* If this file was opened for write, then erase contents */
7274 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7275 		clear_tracing_err_log(tr);
7276 
7277 	if (file->f_mode & FMODE_READ) {
7278 		ret = seq_open(file, &tracing_err_log_seq_ops);
7279 		if (!ret) {
7280 			struct seq_file *m = file->private_data;
7281 			m->private = tr;
7282 		} else {
7283 			trace_array_put(tr);
7284 		}
7285 	}
7286 	return ret;
7287 }
7288 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7289 static ssize_t tracing_err_log_write(struct file *file,
7290 				     const char __user *buffer,
7291 				     size_t count, loff_t *ppos)
7292 {
7293 	return count;
7294 }
7295 
tracing_err_log_release(struct inode * inode,struct file * file)7296 static int tracing_err_log_release(struct inode *inode, struct file *file)
7297 {
7298 	struct trace_array *tr = inode->i_private;
7299 
7300 	trace_array_put(tr);
7301 
7302 	if (file->f_mode & FMODE_READ)
7303 		seq_release(inode, file);
7304 
7305 	return 0;
7306 }
7307 
7308 static const struct file_operations tracing_err_log_fops = {
7309 	.open           = tracing_err_log_open,
7310 	.write		= tracing_err_log_write,
7311 	.read           = seq_read,
7312 	.llseek         = tracing_lseek,
7313 	.release        = tracing_err_log_release,
7314 };
7315 
tracing_buffers_open(struct inode * inode,struct file * filp)7316 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7317 {
7318 	struct trace_array *tr = inode->i_private;
7319 	struct ftrace_buffer_info *info;
7320 	int ret;
7321 
7322 	ret = tracing_check_open_get_tr(tr);
7323 	if (ret)
7324 		return ret;
7325 
7326 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7327 	if (!info) {
7328 		trace_array_put(tr);
7329 		return -ENOMEM;
7330 	}
7331 
7332 	mutex_lock(&trace_types_lock);
7333 
7334 	info->iter.tr		= tr;
7335 	info->iter.cpu_file	= tracing_get_cpu(inode);
7336 	info->iter.trace	= tr->current_trace;
7337 	info->iter.trace_buffer = &tr->trace_buffer;
7338 	info->spare		= NULL;
7339 	/* Force reading ring buffer for first read */
7340 	info->read		= (unsigned int)-1;
7341 
7342 	filp->private_data = info;
7343 
7344 	tr->current_trace->ref++;
7345 
7346 	mutex_unlock(&trace_types_lock);
7347 
7348 	ret = nonseekable_open(inode, filp);
7349 	if (ret < 0)
7350 		trace_array_put(tr);
7351 
7352 	return ret;
7353 }
7354 
7355 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7356 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7357 {
7358 	struct ftrace_buffer_info *info = filp->private_data;
7359 	struct trace_iterator *iter = &info->iter;
7360 
7361 	return trace_poll(iter, filp, poll_table);
7362 }
7363 
7364 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7365 tracing_buffers_read(struct file *filp, char __user *ubuf,
7366 		     size_t count, loff_t *ppos)
7367 {
7368 	struct ftrace_buffer_info *info = filp->private_data;
7369 	struct trace_iterator *iter = &info->iter;
7370 	ssize_t ret = 0;
7371 	ssize_t size;
7372 
7373 	if (!count)
7374 		return 0;
7375 
7376 #ifdef CONFIG_TRACER_MAX_TRACE
7377 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7378 		return -EBUSY;
7379 #endif
7380 
7381 	if (!info->spare) {
7382 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7383 							  iter->cpu_file);
7384 		if (IS_ERR(info->spare)) {
7385 			ret = PTR_ERR(info->spare);
7386 			info->spare = NULL;
7387 		} else {
7388 			info->spare_cpu = iter->cpu_file;
7389 		}
7390 	}
7391 	if (!info->spare)
7392 		return ret;
7393 
7394 	/* Do we have previous read data to read? */
7395 	if (info->read < PAGE_SIZE)
7396 		goto read;
7397 
7398  again:
7399 	trace_access_lock(iter->cpu_file);
7400 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7401 				    &info->spare,
7402 				    count,
7403 				    iter->cpu_file, 0);
7404 	trace_access_unlock(iter->cpu_file);
7405 
7406 	if (ret < 0) {
7407 		if (trace_empty(iter)) {
7408 			if ((filp->f_flags & O_NONBLOCK))
7409 				return -EAGAIN;
7410 
7411 			ret = wait_on_pipe(iter, 0);
7412 			if (ret)
7413 				return ret;
7414 
7415 			goto again;
7416 		}
7417 		return 0;
7418 	}
7419 
7420 	info->read = 0;
7421  read:
7422 	size = PAGE_SIZE - info->read;
7423 	if (size > count)
7424 		size = count;
7425 
7426 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7427 	if (ret == size)
7428 		return -EFAULT;
7429 
7430 	size -= ret;
7431 
7432 	*ppos += size;
7433 	info->read += size;
7434 
7435 	return size;
7436 }
7437 
tracing_buffers_release(struct inode * inode,struct file * file)7438 static int tracing_buffers_release(struct inode *inode, struct file *file)
7439 {
7440 	struct ftrace_buffer_info *info = file->private_data;
7441 	struct trace_iterator *iter = &info->iter;
7442 
7443 	mutex_lock(&trace_types_lock);
7444 
7445 	iter->tr->current_trace->ref--;
7446 
7447 	__trace_array_put(iter->tr);
7448 
7449 	if (info->spare)
7450 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7451 					   info->spare_cpu, info->spare);
7452 	kfree(info);
7453 
7454 	mutex_unlock(&trace_types_lock);
7455 
7456 	return 0;
7457 }
7458 
7459 struct buffer_ref {
7460 	struct ring_buffer	*buffer;
7461 	void			*page;
7462 	int			cpu;
7463 	refcount_t		refcount;
7464 };
7465 
buffer_ref_release(struct buffer_ref * ref)7466 static void buffer_ref_release(struct buffer_ref *ref)
7467 {
7468 	if (!refcount_dec_and_test(&ref->refcount))
7469 		return;
7470 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7471 	kfree(ref);
7472 }
7473 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7474 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7475 				    struct pipe_buffer *buf)
7476 {
7477 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7478 
7479 	buffer_ref_release(ref);
7480 	buf->private = 0;
7481 }
7482 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7483 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7484 				struct pipe_buffer *buf)
7485 {
7486 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7487 
7488 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7489 		return false;
7490 
7491 	refcount_inc(&ref->refcount);
7492 	return true;
7493 }
7494 
7495 /* Pipe buffer operations for a buffer. */
7496 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7497 	.confirm		= generic_pipe_buf_confirm,
7498 	.release		= buffer_pipe_buf_release,
7499 	.steal			= generic_pipe_buf_nosteal,
7500 	.get			= buffer_pipe_buf_get,
7501 };
7502 
7503 /*
7504  * Callback from splice_to_pipe(), if we need to release some pages
7505  * at the end of the spd in case we error'ed out in filling the pipe.
7506  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7507 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7508 {
7509 	struct buffer_ref *ref =
7510 		(struct buffer_ref *)spd->partial[i].private;
7511 
7512 	buffer_ref_release(ref);
7513 	spd->partial[i].private = 0;
7514 }
7515 
7516 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7517 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7518 			    struct pipe_inode_info *pipe, size_t len,
7519 			    unsigned int flags)
7520 {
7521 	struct ftrace_buffer_info *info = file->private_data;
7522 	struct trace_iterator *iter = &info->iter;
7523 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7524 	struct page *pages_def[PIPE_DEF_BUFFERS];
7525 	struct splice_pipe_desc spd = {
7526 		.pages		= pages_def,
7527 		.partial	= partial_def,
7528 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7529 		.ops		= &buffer_pipe_buf_ops,
7530 		.spd_release	= buffer_spd_release,
7531 	};
7532 	struct buffer_ref *ref;
7533 	int entries, i;
7534 	ssize_t ret = 0;
7535 
7536 #ifdef CONFIG_TRACER_MAX_TRACE
7537 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7538 		return -EBUSY;
7539 #endif
7540 
7541 	if (*ppos & (PAGE_SIZE - 1))
7542 		return -EINVAL;
7543 
7544 	if (len & (PAGE_SIZE - 1)) {
7545 		if (len < PAGE_SIZE)
7546 			return -EINVAL;
7547 		len &= PAGE_MASK;
7548 	}
7549 
7550 	if (splice_grow_spd(pipe, &spd))
7551 		return -ENOMEM;
7552 
7553  again:
7554 	trace_access_lock(iter->cpu_file);
7555 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7556 
7557 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7558 		struct page *page;
7559 		int r;
7560 
7561 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7562 		if (!ref) {
7563 			ret = -ENOMEM;
7564 			break;
7565 		}
7566 
7567 		refcount_set(&ref->refcount, 1);
7568 		ref->buffer = iter->trace_buffer->buffer;
7569 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7570 		if (IS_ERR(ref->page)) {
7571 			ret = PTR_ERR(ref->page);
7572 			ref->page = NULL;
7573 			kfree(ref);
7574 			break;
7575 		}
7576 		ref->cpu = iter->cpu_file;
7577 
7578 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7579 					  len, iter->cpu_file, 1);
7580 		if (r < 0) {
7581 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7582 						   ref->page);
7583 			kfree(ref);
7584 			break;
7585 		}
7586 
7587 		page = virt_to_page(ref->page);
7588 
7589 		spd.pages[i] = page;
7590 		spd.partial[i].len = PAGE_SIZE;
7591 		spd.partial[i].offset = 0;
7592 		spd.partial[i].private = (unsigned long)ref;
7593 		spd.nr_pages++;
7594 		*ppos += PAGE_SIZE;
7595 
7596 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7597 	}
7598 
7599 	trace_access_unlock(iter->cpu_file);
7600 	spd.nr_pages = i;
7601 
7602 	/* did we read anything? */
7603 	if (!spd.nr_pages) {
7604 		if (ret)
7605 			goto out;
7606 
7607 		ret = -EAGAIN;
7608 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7609 			goto out;
7610 
7611 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7612 		if (ret)
7613 			goto out;
7614 
7615 		goto again;
7616 	}
7617 
7618 	ret = splice_to_pipe(pipe, &spd);
7619 out:
7620 	splice_shrink_spd(&spd);
7621 
7622 	return ret;
7623 }
7624 
7625 static const struct file_operations tracing_buffers_fops = {
7626 	.open		= tracing_buffers_open,
7627 	.read		= tracing_buffers_read,
7628 	.poll		= tracing_buffers_poll,
7629 	.release	= tracing_buffers_release,
7630 	.splice_read	= tracing_buffers_splice_read,
7631 	.llseek		= no_llseek,
7632 };
7633 
7634 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7635 tracing_stats_read(struct file *filp, char __user *ubuf,
7636 		   size_t count, loff_t *ppos)
7637 {
7638 	struct inode *inode = file_inode(filp);
7639 	struct trace_array *tr = inode->i_private;
7640 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7641 	int cpu = tracing_get_cpu(inode);
7642 	struct trace_seq *s;
7643 	unsigned long cnt;
7644 	unsigned long long t;
7645 	unsigned long usec_rem;
7646 
7647 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7648 	if (!s)
7649 		return -ENOMEM;
7650 
7651 	trace_seq_init(s);
7652 
7653 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7654 	trace_seq_printf(s, "entries: %ld\n", cnt);
7655 
7656 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7657 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7658 
7659 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7660 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7661 
7662 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7663 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7664 
7665 	if (trace_clocks[tr->clock_id].in_ns) {
7666 		/* local or global for trace_clock */
7667 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7668 		usec_rem = do_div(t, USEC_PER_SEC);
7669 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7670 								t, usec_rem);
7671 
7672 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7673 		usec_rem = do_div(t, USEC_PER_SEC);
7674 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7675 	} else {
7676 		/* counter or tsc mode for trace_clock */
7677 		trace_seq_printf(s, "oldest event ts: %llu\n",
7678 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7679 
7680 		trace_seq_printf(s, "now ts: %llu\n",
7681 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7682 	}
7683 
7684 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7685 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7686 
7687 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7688 	trace_seq_printf(s, "read events: %ld\n", cnt);
7689 
7690 	count = simple_read_from_buffer(ubuf, count, ppos,
7691 					s->buffer, trace_seq_used(s));
7692 
7693 	kfree(s);
7694 
7695 	return count;
7696 }
7697 
7698 static const struct file_operations tracing_stats_fops = {
7699 	.open		= tracing_open_generic_tr,
7700 	.read		= tracing_stats_read,
7701 	.llseek		= generic_file_llseek,
7702 	.release	= tracing_release_generic_tr,
7703 };
7704 
7705 #ifdef CONFIG_DYNAMIC_FTRACE
7706 
7707 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7708 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7709 		  size_t cnt, loff_t *ppos)
7710 {
7711 	ssize_t ret;
7712 	char *buf;
7713 	int r;
7714 
7715 	/* 256 should be plenty to hold the amount needed */
7716 	buf = kmalloc(256, GFP_KERNEL);
7717 	if (!buf)
7718 		return -ENOMEM;
7719 
7720 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7721 		      ftrace_update_tot_cnt,
7722 		      ftrace_number_of_pages,
7723 		      ftrace_number_of_groups);
7724 
7725 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7726 	kfree(buf);
7727 	return ret;
7728 }
7729 
7730 static const struct file_operations tracing_dyn_info_fops = {
7731 	.open		= tracing_open_generic,
7732 	.read		= tracing_read_dyn_info,
7733 	.llseek		= generic_file_llseek,
7734 };
7735 #endif /* CONFIG_DYNAMIC_FTRACE */
7736 
7737 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7738 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7739 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7740 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7741 		void *data)
7742 {
7743 	tracing_snapshot_instance(tr);
7744 }
7745 
7746 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7747 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7748 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7749 		      void *data)
7750 {
7751 	struct ftrace_func_mapper *mapper = data;
7752 	long *count = NULL;
7753 
7754 	if (mapper)
7755 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7756 
7757 	if (count) {
7758 
7759 		if (*count <= 0)
7760 			return;
7761 
7762 		(*count)--;
7763 	}
7764 
7765 	tracing_snapshot_instance(tr);
7766 }
7767 
7768 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7769 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7770 		      struct ftrace_probe_ops *ops, void *data)
7771 {
7772 	struct ftrace_func_mapper *mapper = data;
7773 	long *count = NULL;
7774 
7775 	seq_printf(m, "%ps:", (void *)ip);
7776 
7777 	seq_puts(m, "snapshot");
7778 
7779 	if (mapper)
7780 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7781 
7782 	if (count)
7783 		seq_printf(m, ":count=%ld\n", *count);
7784 	else
7785 		seq_puts(m, ":unlimited\n");
7786 
7787 	return 0;
7788 }
7789 
7790 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)7791 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7792 		     unsigned long ip, void *init_data, void **data)
7793 {
7794 	struct ftrace_func_mapper *mapper = *data;
7795 
7796 	if (!mapper) {
7797 		mapper = allocate_ftrace_func_mapper();
7798 		if (!mapper)
7799 			return -ENOMEM;
7800 		*data = mapper;
7801 	}
7802 
7803 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7804 }
7805 
7806 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)7807 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7808 		     unsigned long ip, void *data)
7809 {
7810 	struct ftrace_func_mapper *mapper = data;
7811 
7812 	if (!ip) {
7813 		if (!mapper)
7814 			return;
7815 		free_ftrace_func_mapper(mapper, NULL);
7816 		return;
7817 	}
7818 
7819 	ftrace_func_mapper_remove_ip(mapper, ip);
7820 }
7821 
7822 static struct ftrace_probe_ops snapshot_probe_ops = {
7823 	.func			= ftrace_snapshot,
7824 	.print			= ftrace_snapshot_print,
7825 };
7826 
7827 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7828 	.func			= ftrace_count_snapshot,
7829 	.print			= ftrace_snapshot_print,
7830 	.init			= ftrace_snapshot_init,
7831 	.free			= ftrace_snapshot_free,
7832 };
7833 
7834 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)7835 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7836 			       char *glob, char *cmd, char *param, int enable)
7837 {
7838 	struct ftrace_probe_ops *ops;
7839 	void *count = (void *)-1;
7840 	char *number;
7841 	int ret;
7842 
7843 	if (!tr)
7844 		return -ENODEV;
7845 
7846 	/* hash funcs only work with set_ftrace_filter */
7847 	if (!enable)
7848 		return -EINVAL;
7849 
7850 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7851 
7852 	if (glob[0] == '!')
7853 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7854 
7855 	if (!param)
7856 		goto out_reg;
7857 
7858 	number = strsep(&param, ":");
7859 
7860 	if (!strlen(number))
7861 		goto out_reg;
7862 
7863 	/*
7864 	 * We use the callback data field (which is a pointer)
7865 	 * as our counter.
7866 	 */
7867 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7868 	if (ret)
7869 		return ret;
7870 
7871  out_reg:
7872 	ret = tracing_alloc_snapshot_instance(tr);
7873 	if (ret < 0)
7874 		goto out;
7875 
7876 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7877 
7878  out:
7879 	return ret < 0 ? ret : 0;
7880 }
7881 
7882 static struct ftrace_func_command ftrace_snapshot_cmd = {
7883 	.name			= "snapshot",
7884 	.func			= ftrace_trace_snapshot_callback,
7885 };
7886 
register_snapshot_cmd(void)7887 static __init int register_snapshot_cmd(void)
7888 {
7889 	return register_ftrace_command(&ftrace_snapshot_cmd);
7890 }
7891 #else
register_snapshot_cmd(void)7892 static inline __init int register_snapshot_cmd(void) { return 0; }
7893 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7894 
tracing_get_dentry(struct trace_array * tr)7895 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7896 {
7897 	if (WARN_ON(!tr->dir))
7898 		return ERR_PTR(-ENODEV);
7899 
7900 	/* Top directory uses NULL as the parent */
7901 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7902 		return NULL;
7903 
7904 	/* All sub buffers have a descriptor */
7905 	return tr->dir;
7906 }
7907 
tracing_dentry_percpu(struct trace_array * tr,int cpu)7908 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7909 {
7910 	struct dentry *d_tracer;
7911 
7912 	if (tr->percpu_dir)
7913 		return tr->percpu_dir;
7914 
7915 	d_tracer = tracing_get_dentry(tr);
7916 	if (IS_ERR(d_tracer))
7917 		return NULL;
7918 
7919 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7920 
7921 	WARN_ONCE(!tr->percpu_dir,
7922 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7923 
7924 	return tr->percpu_dir;
7925 }
7926 
7927 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)7928 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7929 		      void *data, long cpu, const struct file_operations *fops)
7930 {
7931 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7932 
7933 	if (ret) /* See tracing_get_cpu() */
7934 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7935 	return ret;
7936 }
7937 
7938 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)7939 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7940 {
7941 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7942 	struct dentry *d_cpu;
7943 	char cpu_dir[30]; /* 30 characters should be more than enough */
7944 
7945 	if (!d_percpu)
7946 		return;
7947 
7948 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7949 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7950 	if (!d_cpu) {
7951 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7952 		return;
7953 	}
7954 
7955 	/* per cpu trace_pipe */
7956 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7957 				tr, cpu, &tracing_pipe_fops);
7958 
7959 	/* per cpu trace */
7960 	trace_create_cpu_file("trace", 0644, d_cpu,
7961 				tr, cpu, &tracing_fops);
7962 
7963 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7964 				tr, cpu, &tracing_buffers_fops);
7965 
7966 	trace_create_cpu_file("stats", 0444, d_cpu,
7967 				tr, cpu, &tracing_stats_fops);
7968 
7969 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7970 				tr, cpu, &tracing_entries_fops);
7971 
7972 #ifdef CONFIG_TRACER_SNAPSHOT
7973 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7974 				tr, cpu, &snapshot_fops);
7975 
7976 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7977 				tr, cpu, &snapshot_raw_fops);
7978 #endif
7979 }
7980 
7981 #ifdef CONFIG_FTRACE_SELFTEST
7982 /* Let selftest have access to static functions in this file */
7983 #include "trace_selftest.c"
7984 #endif
7985 
7986 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7987 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7988 			loff_t *ppos)
7989 {
7990 	struct trace_option_dentry *topt = filp->private_data;
7991 	char *buf;
7992 
7993 	if (topt->flags->val & topt->opt->bit)
7994 		buf = "1\n";
7995 	else
7996 		buf = "0\n";
7997 
7998 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7999 }
8000 
8001 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8002 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8003 			 loff_t *ppos)
8004 {
8005 	struct trace_option_dentry *topt = filp->private_data;
8006 	unsigned long val;
8007 	int ret;
8008 
8009 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8010 	if (ret)
8011 		return ret;
8012 
8013 	if (val != 0 && val != 1)
8014 		return -EINVAL;
8015 
8016 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8017 		mutex_lock(&trace_types_lock);
8018 		ret = __set_tracer_option(topt->tr, topt->flags,
8019 					  topt->opt, !val);
8020 		mutex_unlock(&trace_types_lock);
8021 		if (ret)
8022 			return ret;
8023 	}
8024 
8025 	*ppos += cnt;
8026 
8027 	return cnt;
8028 }
8029 
tracing_open_options(struct inode * inode,struct file * filp)8030 static int tracing_open_options(struct inode *inode, struct file *filp)
8031 {
8032 	struct trace_option_dentry *topt = inode->i_private;
8033 	int ret;
8034 
8035 	ret = tracing_check_open_get_tr(topt->tr);
8036 	if (ret)
8037 		return ret;
8038 
8039 	filp->private_data = inode->i_private;
8040 	return 0;
8041 }
8042 
tracing_release_options(struct inode * inode,struct file * file)8043 static int tracing_release_options(struct inode *inode, struct file *file)
8044 {
8045 	struct trace_option_dentry *topt = file->private_data;
8046 
8047 	trace_array_put(topt->tr);
8048 	return 0;
8049 }
8050 
8051 static const struct file_operations trace_options_fops = {
8052 	.open = tracing_open_options,
8053 	.read = trace_options_read,
8054 	.write = trace_options_write,
8055 	.llseek	= generic_file_llseek,
8056 	.release = tracing_release_options,
8057 };
8058 
8059 /*
8060  * In order to pass in both the trace_array descriptor as well as the index
8061  * to the flag that the trace option file represents, the trace_array
8062  * has a character array of trace_flags_index[], which holds the index
8063  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8064  * The address of this character array is passed to the flag option file
8065  * read/write callbacks.
8066  *
8067  * In order to extract both the index and the trace_array descriptor,
8068  * get_tr_index() uses the following algorithm.
8069  *
8070  *   idx = *ptr;
8071  *
8072  * As the pointer itself contains the address of the index (remember
8073  * index[1] == 1).
8074  *
8075  * Then to get the trace_array descriptor, by subtracting that index
8076  * from the ptr, we get to the start of the index itself.
8077  *
8078  *   ptr - idx == &index[0]
8079  *
8080  * Then a simple container_of() from that pointer gets us to the
8081  * trace_array descriptor.
8082  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8083 static void get_tr_index(void *data, struct trace_array **ptr,
8084 			 unsigned int *pindex)
8085 {
8086 	*pindex = *(unsigned char *)data;
8087 
8088 	*ptr = container_of(data - *pindex, struct trace_array,
8089 			    trace_flags_index);
8090 }
8091 
8092 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8093 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8094 			loff_t *ppos)
8095 {
8096 	void *tr_index = filp->private_data;
8097 	struct trace_array *tr;
8098 	unsigned int index;
8099 	char *buf;
8100 
8101 	get_tr_index(tr_index, &tr, &index);
8102 
8103 	if (tr->trace_flags & (1 << index))
8104 		buf = "1\n";
8105 	else
8106 		buf = "0\n";
8107 
8108 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8109 }
8110 
8111 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8112 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8113 			 loff_t *ppos)
8114 {
8115 	void *tr_index = filp->private_data;
8116 	struct trace_array *tr;
8117 	unsigned int index;
8118 	unsigned long val;
8119 	int ret;
8120 
8121 	get_tr_index(tr_index, &tr, &index);
8122 
8123 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8124 	if (ret)
8125 		return ret;
8126 
8127 	if (val != 0 && val != 1)
8128 		return -EINVAL;
8129 
8130 	mutex_lock(&event_mutex);
8131 	mutex_lock(&trace_types_lock);
8132 	ret = set_tracer_flag(tr, 1 << index, val);
8133 	mutex_unlock(&trace_types_lock);
8134 	mutex_unlock(&event_mutex);
8135 
8136 	if (ret < 0)
8137 		return ret;
8138 
8139 	*ppos += cnt;
8140 
8141 	return cnt;
8142 }
8143 
8144 static const struct file_operations trace_options_core_fops = {
8145 	.open = tracing_open_generic,
8146 	.read = trace_options_core_read,
8147 	.write = trace_options_core_write,
8148 	.llseek = generic_file_llseek,
8149 };
8150 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8151 struct dentry *trace_create_file(const char *name,
8152 				 umode_t mode,
8153 				 struct dentry *parent,
8154 				 void *data,
8155 				 const struct file_operations *fops)
8156 {
8157 	struct dentry *ret;
8158 
8159 	ret = tracefs_create_file(name, mode, parent, data, fops);
8160 	if (!ret)
8161 		pr_warn("Could not create tracefs '%s' entry\n", name);
8162 
8163 	return ret;
8164 }
8165 
8166 
trace_options_init_dentry(struct trace_array * tr)8167 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8168 {
8169 	struct dentry *d_tracer;
8170 
8171 	if (tr->options)
8172 		return tr->options;
8173 
8174 	d_tracer = tracing_get_dentry(tr);
8175 	if (IS_ERR(d_tracer))
8176 		return NULL;
8177 
8178 	tr->options = tracefs_create_dir("options", d_tracer);
8179 	if (!tr->options) {
8180 		pr_warn("Could not create tracefs directory 'options'\n");
8181 		return NULL;
8182 	}
8183 
8184 	return tr->options;
8185 }
8186 
8187 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8188 create_trace_option_file(struct trace_array *tr,
8189 			 struct trace_option_dentry *topt,
8190 			 struct tracer_flags *flags,
8191 			 struct tracer_opt *opt)
8192 {
8193 	struct dentry *t_options;
8194 
8195 	t_options = trace_options_init_dentry(tr);
8196 	if (!t_options)
8197 		return;
8198 
8199 	topt->flags = flags;
8200 	topt->opt = opt;
8201 	topt->tr = tr;
8202 
8203 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8204 				    &trace_options_fops);
8205 
8206 }
8207 
8208 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8209 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8210 {
8211 	struct trace_option_dentry *topts;
8212 	struct trace_options *tr_topts;
8213 	struct tracer_flags *flags;
8214 	struct tracer_opt *opts;
8215 	int cnt;
8216 	int i;
8217 
8218 	if (!tracer)
8219 		return;
8220 
8221 	flags = tracer->flags;
8222 
8223 	if (!flags || !flags->opts)
8224 		return;
8225 
8226 	/*
8227 	 * If this is an instance, only create flags for tracers
8228 	 * the instance may have.
8229 	 */
8230 	if (!trace_ok_for_array(tracer, tr))
8231 		return;
8232 
8233 	for (i = 0; i < tr->nr_topts; i++) {
8234 		/* Make sure there's no duplicate flags. */
8235 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8236 			return;
8237 	}
8238 
8239 	opts = flags->opts;
8240 
8241 	for (cnt = 0; opts[cnt].name; cnt++)
8242 		;
8243 
8244 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8245 	if (!topts)
8246 		return;
8247 
8248 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8249 			    GFP_KERNEL);
8250 	if (!tr_topts) {
8251 		kfree(topts);
8252 		return;
8253 	}
8254 
8255 	tr->topts = tr_topts;
8256 	tr->topts[tr->nr_topts].tracer = tracer;
8257 	tr->topts[tr->nr_topts].topts = topts;
8258 	tr->nr_topts++;
8259 
8260 	for (cnt = 0; opts[cnt].name; cnt++) {
8261 		create_trace_option_file(tr, &topts[cnt], flags,
8262 					 &opts[cnt]);
8263 		WARN_ONCE(topts[cnt].entry == NULL,
8264 			  "Failed to create trace option: %s",
8265 			  opts[cnt].name);
8266 	}
8267 }
8268 
8269 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8270 create_trace_option_core_file(struct trace_array *tr,
8271 			      const char *option, long index)
8272 {
8273 	struct dentry *t_options;
8274 
8275 	t_options = trace_options_init_dentry(tr);
8276 	if (!t_options)
8277 		return NULL;
8278 
8279 	return trace_create_file(option, 0644, t_options,
8280 				 (void *)&tr->trace_flags_index[index],
8281 				 &trace_options_core_fops);
8282 }
8283 
create_trace_options_dir(struct trace_array * tr)8284 static void create_trace_options_dir(struct trace_array *tr)
8285 {
8286 	struct dentry *t_options;
8287 	bool top_level = tr == &global_trace;
8288 	int i;
8289 
8290 	t_options = trace_options_init_dentry(tr);
8291 	if (!t_options)
8292 		return;
8293 
8294 	for (i = 0; trace_options[i]; i++) {
8295 		if (top_level ||
8296 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8297 			create_trace_option_core_file(tr, trace_options[i], i);
8298 	}
8299 }
8300 
8301 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8302 rb_simple_read(struct file *filp, char __user *ubuf,
8303 	       size_t cnt, loff_t *ppos)
8304 {
8305 	struct trace_array *tr = filp->private_data;
8306 	char buf[64];
8307 	int r;
8308 
8309 	r = tracer_tracing_is_on(tr);
8310 	r = sprintf(buf, "%d\n", r);
8311 
8312 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8313 }
8314 
8315 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8316 rb_simple_write(struct file *filp, const char __user *ubuf,
8317 		size_t cnt, loff_t *ppos)
8318 {
8319 	struct trace_array *tr = filp->private_data;
8320 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
8321 	unsigned long val;
8322 	int ret;
8323 
8324 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8325 	if (ret)
8326 		return ret;
8327 
8328 	if (buffer) {
8329 		mutex_lock(&trace_types_lock);
8330 		if (!!val == tracer_tracing_is_on(tr)) {
8331 			val = 0; /* do nothing */
8332 		} else if (val) {
8333 			tracer_tracing_on(tr);
8334 			if (tr->current_trace->start)
8335 				tr->current_trace->start(tr);
8336 		} else {
8337 			tracer_tracing_off(tr);
8338 			if (tr->current_trace->stop)
8339 				tr->current_trace->stop(tr);
8340 		}
8341 		mutex_unlock(&trace_types_lock);
8342 	}
8343 
8344 	(*ppos)++;
8345 
8346 	return cnt;
8347 }
8348 
8349 static const struct file_operations rb_simple_fops = {
8350 	.open		= tracing_open_generic_tr,
8351 	.read		= rb_simple_read,
8352 	.write		= rb_simple_write,
8353 	.release	= tracing_release_generic_tr,
8354 	.llseek		= default_llseek,
8355 };
8356 
8357 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8358 buffer_percent_read(struct file *filp, char __user *ubuf,
8359 		    size_t cnt, loff_t *ppos)
8360 {
8361 	struct trace_array *tr = filp->private_data;
8362 	char buf[64];
8363 	int r;
8364 
8365 	r = tr->buffer_percent;
8366 	r = sprintf(buf, "%d\n", r);
8367 
8368 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8369 }
8370 
8371 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8372 buffer_percent_write(struct file *filp, const char __user *ubuf,
8373 		     size_t cnt, loff_t *ppos)
8374 {
8375 	struct trace_array *tr = filp->private_data;
8376 	unsigned long val;
8377 	int ret;
8378 
8379 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8380 	if (ret)
8381 		return ret;
8382 
8383 	if (val > 100)
8384 		return -EINVAL;
8385 
8386 	tr->buffer_percent = val;
8387 
8388 	(*ppos)++;
8389 
8390 	return cnt;
8391 }
8392 
8393 static const struct file_operations buffer_percent_fops = {
8394 	.open		= tracing_open_generic_tr,
8395 	.read		= buffer_percent_read,
8396 	.write		= buffer_percent_write,
8397 	.release	= tracing_release_generic_tr,
8398 	.llseek		= default_llseek,
8399 };
8400 
8401 static struct dentry *trace_instance_dir;
8402 
8403 static void
8404 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8405 
8406 static int
allocate_trace_buffer(struct trace_array * tr,struct trace_buffer * buf,int size)8407 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8408 {
8409 	enum ring_buffer_flags rb_flags;
8410 
8411 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8412 
8413 	buf->tr = tr;
8414 
8415 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8416 	if (!buf->buffer)
8417 		return -ENOMEM;
8418 
8419 	buf->data = alloc_percpu(struct trace_array_cpu);
8420 	if (!buf->data) {
8421 		ring_buffer_free(buf->buffer);
8422 		buf->buffer = NULL;
8423 		return -ENOMEM;
8424 	}
8425 
8426 	/* Allocate the first page for all buffers */
8427 	set_buffer_entries(&tr->trace_buffer,
8428 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
8429 
8430 	return 0;
8431 }
8432 
allocate_trace_buffers(struct trace_array * tr,int size)8433 static int allocate_trace_buffers(struct trace_array *tr, int size)
8434 {
8435 	int ret;
8436 
8437 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8438 	if (ret)
8439 		return ret;
8440 
8441 #ifdef CONFIG_TRACER_MAX_TRACE
8442 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8443 				    allocate_snapshot ? size : 1);
8444 	if (WARN_ON(ret)) {
8445 		ring_buffer_free(tr->trace_buffer.buffer);
8446 		tr->trace_buffer.buffer = NULL;
8447 		free_percpu(tr->trace_buffer.data);
8448 		tr->trace_buffer.data = NULL;
8449 		return -ENOMEM;
8450 	}
8451 	tr->allocated_snapshot = allocate_snapshot;
8452 
8453 	/*
8454 	 * Only the top level trace array gets its snapshot allocated
8455 	 * from the kernel command line.
8456 	 */
8457 	allocate_snapshot = false;
8458 #endif
8459 
8460 	/*
8461 	 * Because of some magic with the way alloc_percpu() works on
8462 	 * x86_64, we need to synchronize the pgd of all the tables,
8463 	 * otherwise the trace events that happen in x86_64 page fault
8464 	 * handlers can't cope with accessing the chance that a
8465 	 * alloc_percpu()'d memory might be touched in the page fault trace
8466 	 * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
8467 	 * calls in tracing, because something might get triggered within a
8468 	 * page fault trace event!
8469 	 */
8470 	vmalloc_sync_mappings();
8471 
8472 	return 0;
8473 }
8474 
free_trace_buffer(struct trace_buffer * buf)8475 static void free_trace_buffer(struct trace_buffer *buf)
8476 {
8477 	if (buf->buffer) {
8478 		ring_buffer_free(buf->buffer);
8479 		buf->buffer = NULL;
8480 		free_percpu(buf->data);
8481 		buf->data = NULL;
8482 	}
8483 }
8484 
free_trace_buffers(struct trace_array * tr)8485 static void free_trace_buffers(struct trace_array *tr)
8486 {
8487 	if (!tr)
8488 		return;
8489 
8490 	free_trace_buffer(&tr->trace_buffer);
8491 
8492 #ifdef CONFIG_TRACER_MAX_TRACE
8493 	free_trace_buffer(&tr->max_buffer);
8494 #endif
8495 }
8496 
init_trace_flags_index(struct trace_array * tr)8497 static void init_trace_flags_index(struct trace_array *tr)
8498 {
8499 	int i;
8500 
8501 	/* Used by the trace options files */
8502 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8503 		tr->trace_flags_index[i] = i;
8504 }
8505 
__update_tracer_options(struct trace_array * tr)8506 static void __update_tracer_options(struct trace_array *tr)
8507 {
8508 	struct tracer *t;
8509 
8510 	for (t = trace_types; t; t = t->next)
8511 		add_tracer_options(tr, t);
8512 }
8513 
update_tracer_options(struct trace_array * tr)8514 static void update_tracer_options(struct trace_array *tr)
8515 {
8516 	mutex_lock(&trace_types_lock);
8517 	tracer_options_updated = true;
8518 	__update_tracer_options(tr);
8519 	mutex_unlock(&trace_types_lock);
8520 }
8521 
trace_array_create(const char * name)8522 struct trace_array *trace_array_create(const char *name)
8523 {
8524 	struct trace_array *tr;
8525 	int ret;
8526 
8527 	mutex_lock(&event_mutex);
8528 	mutex_lock(&trace_types_lock);
8529 
8530 	ret = -EEXIST;
8531 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8532 		if (tr->name && strcmp(tr->name, name) == 0)
8533 			goto out_unlock;
8534 	}
8535 
8536 	ret = -ENOMEM;
8537 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8538 	if (!tr)
8539 		goto out_unlock;
8540 
8541 	tr->name = kstrdup(name, GFP_KERNEL);
8542 	if (!tr->name)
8543 		goto out_free_tr;
8544 
8545 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8546 		goto out_free_tr;
8547 
8548 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8549 
8550 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8551 
8552 	raw_spin_lock_init(&tr->start_lock);
8553 
8554 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8555 
8556 	tr->current_trace = &nop_trace;
8557 
8558 	INIT_LIST_HEAD(&tr->systems);
8559 	INIT_LIST_HEAD(&tr->events);
8560 	INIT_LIST_HEAD(&tr->hist_vars);
8561 	INIT_LIST_HEAD(&tr->err_log);
8562 
8563 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8564 		goto out_free_tr;
8565 
8566 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8567 	if (!tr->dir)
8568 		goto out_free_tr;
8569 
8570 	ret = event_trace_add_tracer(tr->dir, tr);
8571 	if (ret) {
8572 		tracefs_remove_recursive(tr->dir);
8573 		goto out_free_tr;
8574 	}
8575 
8576 	ftrace_init_trace_array(tr);
8577 
8578 	init_tracer_tracefs(tr, tr->dir);
8579 	init_trace_flags_index(tr);
8580 	__update_tracer_options(tr);
8581 
8582 	list_add(&tr->list, &ftrace_trace_arrays);
8583 
8584 	mutex_unlock(&trace_types_lock);
8585 	mutex_unlock(&event_mutex);
8586 
8587 	return tr;
8588 
8589  out_free_tr:
8590 	free_trace_buffers(tr);
8591 	free_cpumask_var(tr->tracing_cpumask);
8592 	kfree(tr->name);
8593 	kfree(tr);
8594 
8595  out_unlock:
8596 	mutex_unlock(&trace_types_lock);
8597 	mutex_unlock(&event_mutex);
8598 
8599 	return ERR_PTR(ret);
8600 }
8601 EXPORT_SYMBOL_GPL(trace_array_create);
8602 
instance_mkdir(const char * name)8603 static int instance_mkdir(const char *name)
8604 {
8605 	return PTR_ERR_OR_ZERO(trace_array_create(name));
8606 }
8607 
__remove_instance(struct trace_array * tr)8608 static int __remove_instance(struct trace_array *tr)
8609 {
8610 	int i;
8611 
8612 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8613 		return -EBUSY;
8614 
8615 	list_del(&tr->list);
8616 
8617 	/* Disable all the flags that were enabled coming in */
8618 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8619 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8620 			set_tracer_flag(tr, 1 << i, 0);
8621 	}
8622 
8623 	tracing_set_nop(tr);
8624 	clear_ftrace_function_probes(tr);
8625 	event_trace_del_tracer(tr);
8626 	ftrace_clear_pids(tr);
8627 	ftrace_destroy_function_files(tr);
8628 	tracefs_remove_recursive(tr->dir);
8629 	free_trace_buffers(tr);
8630 	clear_tracing_err_log(tr);
8631 
8632 	for (i = 0; i < tr->nr_topts; i++) {
8633 		kfree(tr->topts[i].topts);
8634 	}
8635 	kfree(tr->topts);
8636 
8637 	free_cpumask_var(tr->tracing_cpumask);
8638 	kfree(tr->name);
8639 	kfree(tr);
8640 	tr = NULL;
8641 
8642 	return 0;
8643 }
8644 
trace_array_destroy(struct trace_array * this_tr)8645 int trace_array_destroy(struct trace_array *this_tr)
8646 {
8647 	struct trace_array *tr;
8648 	int ret;
8649 
8650 	if (!this_tr)
8651 		return -EINVAL;
8652 
8653 	mutex_lock(&event_mutex);
8654 	mutex_lock(&trace_types_lock);
8655 
8656 	ret = -ENODEV;
8657 
8658 	/* Making sure trace array exists before destroying it. */
8659 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8660 		if (tr == this_tr) {
8661 			ret = __remove_instance(tr);
8662 			break;
8663 		}
8664 	}
8665 
8666 	mutex_unlock(&trace_types_lock);
8667 	mutex_unlock(&event_mutex);
8668 
8669 	return ret;
8670 }
8671 EXPORT_SYMBOL_GPL(trace_array_destroy);
8672 
instance_rmdir(const char * name)8673 static int instance_rmdir(const char *name)
8674 {
8675 	struct trace_array *tr;
8676 	int ret;
8677 
8678 	mutex_lock(&event_mutex);
8679 	mutex_lock(&trace_types_lock);
8680 
8681 	ret = -ENODEV;
8682 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8683 		if (tr->name && strcmp(tr->name, name) == 0) {
8684 			ret = __remove_instance(tr);
8685 			break;
8686 		}
8687 	}
8688 
8689 	mutex_unlock(&trace_types_lock);
8690 	mutex_unlock(&event_mutex);
8691 
8692 	return ret;
8693 }
8694 
create_trace_instances(struct dentry * d_tracer)8695 static __init void create_trace_instances(struct dentry *d_tracer)
8696 {
8697 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8698 							 instance_mkdir,
8699 							 instance_rmdir);
8700 	if (WARN_ON(!trace_instance_dir))
8701 		return;
8702 }
8703 
8704 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)8705 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8706 {
8707 	struct trace_event_file *file;
8708 	int cpu;
8709 
8710 	trace_create_file("available_tracers", 0444, d_tracer,
8711 			tr, &show_traces_fops);
8712 
8713 	trace_create_file("current_tracer", 0644, d_tracer,
8714 			tr, &set_tracer_fops);
8715 
8716 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8717 			  tr, &tracing_cpumask_fops);
8718 
8719 	trace_create_file("trace_options", 0644, d_tracer,
8720 			  tr, &tracing_iter_fops);
8721 
8722 	trace_create_file("trace", 0644, d_tracer,
8723 			  tr, &tracing_fops);
8724 
8725 	trace_create_file("trace_pipe", 0444, d_tracer,
8726 			  tr, &tracing_pipe_fops);
8727 
8728 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8729 			  tr, &tracing_entries_fops);
8730 
8731 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8732 			  tr, &tracing_total_entries_fops);
8733 
8734 	trace_create_file("free_buffer", 0200, d_tracer,
8735 			  tr, &tracing_free_buffer_fops);
8736 
8737 	trace_create_file("trace_marker", 0220, d_tracer,
8738 			  tr, &tracing_mark_fops);
8739 
8740 	file = __find_event_file(tr, "ftrace", "print");
8741 	if (file && file->dir)
8742 		trace_create_file("trigger", 0644, file->dir, file,
8743 				  &event_trigger_fops);
8744 	tr->trace_marker_file = file;
8745 
8746 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8747 			  tr, &tracing_mark_raw_fops);
8748 
8749 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8750 			  &trace_clock_fops);
8751 
8752 	trace_create_file("tracing_on", 0644, d_tracer,
8753 			  tr, &rb_simple_fops);
8754 
8755 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8756 			  &trace_time_stamp_mode_fops);
8757 
8758 	tr->buffer_percent = 50;
8759 
8760 	trace_create_file("buffer_percent", 0444, d_tracer,
8761 			tr, &buffer_percent_fops);
8762 
8763 	create_trace_options_dir(tr);
8764 
8765 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8766 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8767 			&tr->max_latency, &tracing_max_lat_fops);
8768 #endif
8769 
8770 	if (ftrace_create_function_files(tr, d_tracer))
8771 		WARN(1, "Could not allocate function filter files");
8772 
8773 #ifdef CONFIG_TRACER_SNAPSHOT
8774 	trace_create_file("snapshot", 0644, d_tracer,
8775 			  tr, &snapshot_fops);
8776 #endif
8777 
8778 	trace_create_file("error_log", 0644, d_tracer,
8779 			  tr, &tracing_err_log_fops);
8780 
8781 	for_each_tracing_cpu(cpu)
8782 		tracing_init_tracefs_percpu(tr, cpu);
8783 
8784 	ftrace_init_tracefs(tr, d_tracer);
8785 }
8786 
trace_automount(struct dentry * mntpt,void * ingore)8787 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8788 {
8789 	struct vfsmount *mnt;
8790 	struct file_system_type *type;
8791 
8792 	/*
8793 	 * To maintain backward compatibility for tools that mount
8794 	 * debugfs to get to the tracing facility, tracefs is automatically
8795 	 * mounted to the debugfs/tracing directory.
8796 	 */
8797 	type = get_fs_type("tracefs");
8798 	if (!type)
8799 		return NULL;
8800 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8801 	put_filesystem(type);
8802 	if (IS_ERR(mnt))
8803 		return NULL;
8804 	mntget(mnt);
8805 
8806 	return mnt;
8807 }
8808 
8809 /**
8810  * tracing_init_dentry - initialize top level trace array
8811  *
8812  * This is called when creating files or directories in the tracing
8813  * directory. It is called via fs_initcall() by any of the boot up code
8814  * and expects to return the dentry of the top level tracing directory.
8815  */
tracing_init_dentry(void)8816 struct dentry *tracing_init_dentry(void)
8817 {
8818 	struct trace_array *tr = &global_trace;
8819 
8820 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
8821 		pr_warning("Tracing disabled due to lockdown\n");
8822 		return ERR_PTR(-EPERM);
8823 	}
8824 
8825 	/* The top level trace array uses  NULL as parent */
8826 	if (tr->dir)
8827 		return NULL;
8828 
8829 	if (WARN_ON(!tracefs_initialized()) ||
8830 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8831 		 WARN_ON(!debugfs_initialized())))
8832 		return ERR_PTR(-ENODEV);
8833 
8834 	/*
8835 	 * As there may still be users that expect the tracing
8836 	 * files to exist in debugfs/tracing, we must automount
8837 	 * the tracefs file system there, so older tools still
8838 	 * work with the newer kerenl.
8839 	 */
8840 	tr->dir = debugfs_create_automount("tracing", NULL,
8841 					   trace_automount, NULL);
8842 
8843 	return NULL;
8844 }
8845 
8846 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8847 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8848 
trace_eval_init(void)8849 static void __init trace_eval_init(void)
8850 {
8851 	int len;
8852 
8853 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8854 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8855 }
8856 
8857 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)8858 static void trace_module_add_evals(struct module *mod)
8859 {
8860 	if (!mod->num_trace_evals)
8861 		return;
8862 
8863 	/*
8864 	 * Modules with bad taint do not have events created, do
8865 	 * not bother with enums either.
8866 	 */
8867 	if (trace_module_has_bad_taint(mod))
8868 		return;
8869 
8870 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8871 }
8872 
8873 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)8874 static void trace_module_remove_evals(struct module *mod)
8875 {
8876 	union trace_eval_map_item *map;
8877 	union trace_eval_map_item **last = &trace_eval_maps;
8878 
8879 	if (!mod->num_trace_evals)
8880 		return;
8881 
8882 	mutex_lock(&trace_eval_mutex);
8883 
8884 	map = trace_eval_maps;
8885 
8886 	while (map) {
8887 		if (map->head.mod == mod)
8888 			break;
8889 		map = trace_eval_jmp_to_tail(map);
8890 		last = &map->tail.next;
8891 		map = map->tail.next;
8892 	}
8893 	if (!map)
8894 		goto out;
8895 
8896 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8897 	kfree(map);
8898  out:
8899 	mutex_unlock(&trace_eval_mutex);
8900 }
8901 #else
trace_module_remove_evals(struct module * mod)8902 static inline void trace_module_remove_evals(struct module *mod) { }
8903 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8904 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)8905 static int trace_module_notify(struct notifier_block *self,
8906 			       unsigned long val, void *data)
8907 {
8908 	struct module *mod = data;
8909 
8910 	switch (val) {
8911 	case MODULE_STATE_COMING:
8912 		trace_module_add_evals(mod);
8913 		break;
8914 	case MODULE_STATE_GOING:
8915 		trace_module_remove_evals(mod);
8916 		break;
8917 	}
8918 
8919 	return 0;
8920 }
8921 
8922 static struct notifier_block trace_module_nb = {
8923 	.notifier_call = trace_module_notify,
8924 	.priority = 0,
8925 };
8926 #endif /* CONFIG_MODULES */
8927 
tracer_init_tracefs(void)8928 static __init int tracer_init_tracefs(void)
8929 {
8930 	struct dentry *d_tracer;
8931 
8932 	trace_access_lock_init();
8933 
8934 	d_tracer = tracing_init_dentry();
8935 	if (IS_ERR(d_tracer))
8936 		return 0;
8937 
8938 	event_trace_init();
8939 
8940 	init_tracer_tracefs(&global_trace, d_tracer);
8941 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8942 
8943 	trace_create_file("tracing_thresh", 0644, d_tracer,
8944 			&global_trace, &tracing_thresh_fops);
8945 
8946 	trace_create_file("README", 0444, d_tracer,
8947 			NULL, &tracing_readme_fops);
8948 
8949 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8950 			NULL, &tracing_saved_cmdlines_fops);
8951 
8952 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8953 			  NULL, &tracing_saved_cmdlines_size_fops);
8954 
8955 	trace_create_file("saved_tgids", 0444, d_tracer,
8956 			NULL, &tracing_saved_tgids_fops);
8957 
8958 	trace_eval_init();
8959 
8960 	trace_create_eval_file(d_tracer);
8961 
8962 #ifdef CONFIG_MODULES
8963 	register_module_notifier(&trace_module_nb);
8964 #endif
8965 
8966 #ifdef CONFIG_DYNAMIC_FTRACE
8967 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8968 			NULL, &tracing_dyn_info_fops);
8969 #endif
8970 
8971 	create_trace_instances(d_tracer);
8972 
8973 	update_tracer_options(&global_trace);
8974 
8975 	return 0;
8976 }
8977 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)8978 static int trace_panic_handler(struct notifier_block *this,
8979 			       unsigned long event, void *unused)
8980 {
8981 	if (ftrace_dump_on_oops)
8982 		ftrace_dump(ftrace_dump_on_oops);
8983 	return NOTIFY_OK;
8984 }
8985 
8986 static struct notifier_block trace_panic_notifier = {
8987 	.notifier_call  = trace_panic_handler,
8988 	.next           = NULL,
8989 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8990 };
8991 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)8992 static int trace_die_handler(struct notifier_block *self,
8993 			     unsigned long val,
8994 			     void *data)
8995 {
8996 	switch (val) {
8997 	case DIE_OOPS:
8998 		if (ftrace_dump_on_oops)
8999 			ftrace_dump(ftrace_dump_on_oops);
9000 		break;
9001 	default:
9002 		break;
9003 	}
9004 	return NOTIFY_OK;
9005 }
9006 
9007 static struct notifier_block trace_die_notifier = {
9008 	.notifier_call = trace_die_handler,
9009 	.priority = 200
9010 };
9011 
9012 /*
9013  * printk is set to max of 1024, we really don't need it that big.
9014  * Nothing should be printing 1000 characters anyway.
9015  */
9016 #define TRACE_MAX_PRINT		1000
9017 
9018 /*
9019  * Define here KERN_TRACE so that we have one place to modify
9020  * it if we decide to change what log level the ftrace dump
9021  * should be at.
9022  */
9023 #define KERN_TRACE		KERN_EMERG
9024 
9025 void
trace_printk_seq(struct trace_seq * s)9026 trace_printk_seq(struct trace_seq *s)
9027 {
9028 	/* Probably should print a warning here. */
9029 	if (s->seq.len >= TRACE_MAX_PRINT)
9030 		s->seq.len = TRACE_MAX_PRINT;
9031 
9032 	/*
9033 	 * More paranoid code. Although the buffer size is set to
9034 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9035 	 * an extra layer of protection.
9036 	 */
9037 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9038 		s->seq.len = s->seq.size - 1;
9039 
9040 	/* should be zero ended, but we are paranoid. */
9041 	s->buffer[s->seq.len] = 0;
9042 
9043 	printk(KERN_TRACE "%s", s->buffer);
9044 
9045 	trace_seq_init(s);
9046 }
9047 
trace_init_global_iter(struct trace_iterator * iter)9048 void trace_init_global_iter(struct trace_iterator *iter)
9049 {
9050 	iter->tr = &global_trace;
9051 	iter->trace = iter->tr->current_trace;
9052 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9053 	iter->trace_buffer = &global_trace.trace_buffer;
9054 
9055 	if (iter->trace && iter->trace->open)
9056 		iter->trace->open(iter);
9057 
9058 	/* Annotate start of buffers if we had overruns */
9059 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
9060 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9061 
9062 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9063 	if (trace_clocks[iter->tr->clock_id].in_ns)
9064 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9065 }
9066 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9067 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9068 {
9069 	/* use static because iter can be a bit big for the stack */
9070 	static struct trace_iterator iter;
9071 	static atomic_t dump_running;
9072 	struct trace_array *tr = &global_trace;
9073 	unsigned int old_userobj;
9074 	unsigned long flags;
9075 	int cnt = 0, cpu;
9076 
9077 	/* Only allow one dump user at a time. */
9078 	if (atomic_inc_return(&dump_running) != 1) {
9079 		atomic_dec(&dump_running);
9080 		return;
9081 	}
9082 
9083 	/*
9084 	 * Always turn off tracing when we dump.
9085 	 * We don't need to show trace output of what happens
9086 	 * between multiple crashes.
9087 	 *
9088 	 * If the user does a sysrq-z, then they can re-enable
9089 	 * tracing with echo 1 > tracing_on.
9090 	 */
9091 	tracing_off();
9092 
9093 	local_irq_save(flags);
9094 	printk_nmi_direct_enter();
9095 
9096 	/* Simulate the iterator */
9097 	trace_init_global_iter(&iter);
9098 
9099 	for_each_tracing_cpu(cpu) {
9100 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9101 	}
9102 
9103 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9104 
9105 	/* don't look at user memory in panic mode */
9106 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9107 
9108 	switch (oops_dump_mode) {
9109 	case DUMP_ALL:
9110 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9111 		break;
9112 	case DUMP_ORIG:
9113 		iter.cpu_file = raw_smp_processor_id();
9114 		break;
9115 	case DUMP_NONE:
9116 		goto out_enable;
9117 	default:
9118 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9119 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9120 	}
9121 
9122 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9123 
9124 	/* Did function tracer already get disabled? */
9125 	if (ftrace_is_dead()) {
9126 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9127 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9128 	}
9129 
9130 	/*
9131 	 * We need to stop all tracing on all CPUS to read the
9132 	 * the next buffer. This is a bit expensive, but is
9133 	 * not done often. We fill all what we can read,
9134 	 * and then release the locks again.
9135 	 */
9136 
9137 	while (!trace_empty(&iter)) {
9138 
9139 		if (!cnt)
9140 			printk(KERN_TRACE "---------------------------------\n");
9141 
9142 		cnt++;
9143 
9144 		trace_iterator_reset(&iter);
9145 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9146 
9147 		if (trace_find_next_entry_inc(&iter) != NULL) {
9148 			int ret;
9149 
9150 			ret = print_trace_line(&iter);
9151 			if (ret != TRACE_TYPE_NO_CONSUME)
9152 				trace_consume(&iter);
9153 		}
9154 		touch_nmi_watchdog();
9155 
9156 		trace_printk_seq(&iter.seq);
9157 	}
9158 
9159 	if (!cnt)
9160 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9161 	else
9162 		printk(KERN_TRACE "---------------------------------\n");
9163 
9164  out_enable:
9165 	tr->trace_flags |= old_userobj;
9166 
9167 	for_each_tracing_cpu(cpu) {
9168 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9169 	}
9170 	atomic_dec(&dump_running);
9171 	printk_nmi_direct_exit();
9172 	local_irq_restore(flags);
9173 }
9174 EXPORT_SYMBOL_GPL(ftrace_dump);
9175 
trace_run_command(const char * buf,int (* createfn)(int,char **))9176 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9177 {
9178 	char **argv;
9179 	int argc, ret;
9180 
9181 	argc = 0;
9182 	ret = 0;
9183 	argv = argv_split(GFP_KERNEL, buf, &argc);
9184 	if (!argv)
9185 		return -ENOMEM;
9186 
9187 	if (argc)
9188 		ret = createfn(argc, argv);
9189 
9190 	argv_free(argv);
9191 
9192 	return ret;
9193 }
9194 
9195 #define WRITE_BUFSIZE  4096
9196 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9197 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9198 				size_t count, loff_t *ppos,
9199 				int (*createfn)(int, char **))
9200 {
9201 	char *kbuf, *buf, *tmp;
9202 	int ret = 0;
9203 	size_t done = 0;
9204 	size_t size;
9205 
9206 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9207 	if (!kbuf)
9208 		return -ENOMEM;
9209 
9210 	while (done < count) {
9211 		size = count - done;
9212 
9213 		if (size >= WRITE_BUFSIZE)
9214 			size = WRITE_BUFSIZE - 1;
9215 
9216 		if (copy_from_user(kbuf, buffer + done, size)) {
9217 			ret = -EFAULT;
9218 			goto out;
9219 		}
9220 		kbuf[size] = '\0';
9221 		buf = kbuf;
9222 		do {
9223 			tmp = strchr(buf, '\n');
9224 			if (tmp) {
9225 				*tmp = '\0';
9226 				size = tmp - buf + 1;
9227 			} else {
9228 				size = strlen(buf);
9229 				if (done + size < count) {
9230 					if (buf != kbuf)
9231 						break;
9232 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9233 					pr_warn("Line length is too long: Should be less than %d\n",
9234 						WRITE_BUFSIZE - 2);
9235 					ret = -EINVAL;
9236 					goto out;
9237 				}
9238 			}
9239 			done += size;
9240 
9241 			/* Remove comments */
9242 			tmp = strchr(buf, '#');
9243 
9244 			if (tmp)
9245 				*tmp = '\0';
9246 
9247 			ret = trace_run_command(buf, createfn);
9248 			if (ret)
9249 				goto out;
9250 			buf += size;
9251 
9252 		} while (done < count);
9253 	}
9254 	ret = done;
9255 
9256 out:
9257 	kfree(kbuf);
9258 
9259 	return ret;
9260 }
9261 
tracer_alloc_buffers(void)9262 __init static int tracer_alloc_buffers(void)
9263 {
9264 	int ring_buf_size;
9265 	int ret = -ENOMEM;
9266 
9267 
9268 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9269 		pr_warning("Tracing disabled due to lockdown\n");
9270 		return -EPERM;
9271 	}
9272 
9273 	/*
9274 	 * Make sure we don't accidently add more trace options
9275 	 * than we have bits for.
9276 	 */
9277 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9278 
9279 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9280 		goto out;
9281 
9282 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9283 		goto out_free_buffer_mask;
9284 
9285 	/* Only allocate trace_printk buffers if a trace_printk exists */
9286 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9287 		/* Must be called before global_trace.buffer is allocated */
9288 		trace_printk_init_buffers();
9289 
9290 	/* To save memory, keep the ring buffer size to its minimum */
9291 	if (ring_buffer_expanded)
9292 		ring_buf_size = trace_buf_size;
9293 	else
9294 		ring_buf_size = 1;
9295 
9296 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9297 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9298 
9299 	raw_spin_lock_init(&global_trace.start_lock);
9300 
9301 	/*
9302 	 * The prepare callbacks allocates some memory for the ring buffer. We
9303 	 * don't free the buffer if the if the CPU goes down. If we were to free
9304 	 * the buffer, then the user would lose any trace that was in the
9305 	 * buffer. The memory will be removed once the "instance" is removed.
9306 	 */
9307 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9308 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9309 				      NULL);
9310 	if (ret < 0)
9311 		goto out_free_cpumask;
9312 	/* Used for event triggers */
9313 	ret = -ENOMEM;
9314 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9315 	if (!temp_buffer)
9316 		goto out_rm_hp_state;
9317 
9318 	if (trace_create_savedcmd() < 0)
9319 		goto out_free_temp_buffer;
9320 
9321 	/* TODO: make the number of buffers hot pluggable with CPUS */
9322 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9323 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9324 		WARN_ON(1);
9325 		goto out_free_savedcmd;
9326 	}
9327 
9328 	if (global_trace.buffer_disabled)
9329 		tracing_off();
9330 
9331 	if (trace_boot_clock) {
9332 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9333 		if (ret < 0)
9334 			pr_warn("Trace clock %s not defined, going back to default\n",
9335 				trace_boot_clock);
9336 	}
9337 
9338 	/*
9339 	 * register_tracer() might reference current_trace, so it
9340 	 * needs to be set before we register anything. This is
9341 	 * just a bootstrap of current_trace anyway.
9342 	 */
9343 	global_trace.current_trace = &nop_trace;
9344 
9345 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9346 
9347 	ftrace_init_global_array_ops(&global_trace);
9348 
9349 	init_trace_flags_index(&global_trace);
9350 
9351 	register_tracer(&nop_trace);
9352 
9353 	/* Function tracing may start here (via kernel command line) */
9354 	init_function_trace();
9355 
9356 	/* All seems OK, enable tracing */
9357 	tracing_disabled = 0;
9358 
9359 	atomic_notifier_chain_register(&panic_notifier_list,
9360 				       &trace_panic_notifier);
9361 
9362 	register_die_notifier(&trace_die_notifier);
9363 
9364 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9365 
9366 	INIT_LIST_HEAD(&global_trace.systems);
9367 	INIT_LIST_HEAD(&global_trace.events);
9368 	INIT_LIST_HEAD(&global_trace.hist_vars);
9369 	INIT_LIST_HEAD(&global_trace.err_log);
9370 	list_add(&global_trace.list, &ftrace_trace_arrays);
9371 
9372 	apply_trace_boot_options();
9373 
9374 	register_snapshot_cmd();
9375 
9376 	return 0;
9377 
9378 out_free_savedcmd:
9379 	free_saved_cmdlines_buffer(savedcmd);
9380 out_free_temp_buffer:
9381 	ring_buffer_free(temp_buffer);
9382 out_rm_hp_state:
9383 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9384 out_free_cpumask:
9385 	free_cpumask_var(global_trace.tracing_cpumask);
9386 out_free_buffer_mask:
9387 	free_cpumask_var(tracing_buffer_mask);
9388 out:
9389 	return ret;
9390 }
9391 
early_trace_init(void)9392 void __init early_trace_init(void)
9393 {
9394 	if (tracepoint_printk) {
9395 		tracepoint_print_iter =
9396 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9397 		if (WARN_ON(!tracepoint_print_iter))
9398 			tracepoint_printk = 0;
9399 		else
9400 			static_key_enable(&tracepoint_printk_key.key);
9401 	}
9402 	tracer_alloc_buffers();
9403 
9404 	init_events();
9405 }
9406 
trace_init(void)9407 void __init trace_init(void)
9408 {
9409 	trace_event_init();
9410 }
9411 
clear_boot_tracer(void)9412 __init static int clear_boot_tracer(void)
9413 {
9414 	/*
9415 	 * The default tracer at boot buffer is an init section.
9416 	 * This function is called in lateinit. If we did not
9417 	 * find the boot tracer, then clear it out, to prevent
9418 	 * later registration from accessing the buffer that is
9419 	 * about to be freed.
9420 	 */
9421 	if (!default_bootup_tracer)
9422 		return 0;
9423 
9424 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9425 	       default_bootup_tracer);
9426 	default_bootup_tracer = NULL;
9427 
9428 	return 0;
9429 }
9430 
9431 fs_initcall(tracer_init_tracefs);
9432 late_initcall_sync(clear_boot_tracer);
9433 
9434 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9435 __init static int tracing_set_default_clock(void)
9436 {
9437 	/* sched_clock_stable() is determined in late_initcall */
9438 	if (!trace_boot_clock && !sched_clock_stable()) {
9439 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9440 			pr_warn("Can not set tracing clock due to lockdown\n");
9441 			return -EPERM;
9442 		}
9443 
9444 		printk(KERN_WARNING
9445 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9446 		       "If you want to keep using the local clock, then add:\n"
9447 		       "  \"trace_clock=local\"\n"
9448 		       "on the kernel command line\n");
9449 		tracing_set_clock(&global_trace, "global");
9450 	}
9451 
9452 	return 0;
9453 }
9454 late_initcall_sync(tracing_set_default_clock);
9455 #endif
9456