• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 
49 #include "trace.h"
50 #include "trace_output.h"
51 
52 /*
53  * On boot up, the ring buffer is set to the minimum size, so that
54  * we do not waste memory on systems that are not using tracing.
55  */
56 bool ring_buffer_expanded;
57 
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66 
67 /*
68  * If a tracer is running, we do not want to run SELFTEST.
69  */
70 bool __read_mostly tracing_selftest_disabled;
71 
72 /* Pipe tracepoints to printk */
73 struct trace_iterator *tracepoint_print_iter;
74 int tracepoint_printk;
75 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
76 
77 /* For tracers that don't implement custom flags */
78 static struct tracer_opt dummy_tracer_opt[] = {
79 	{ }
80 };
81 
82 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85 	return 0;
86 }
87 
88 /*
89  * To prevent the comm cache from being overwritten when no
90  * tracing is active, only save the comm when a trace event
91  * occurred.
92  */
93 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
94 
95 /*
96  * Kill all tracing for good (never come back).
97  * It is initialized to 1 but will turn to zero if the initialization
98  * of the tracer is successful. But that is the only place that sets
99  * this back to zero.
100  */
101 static int tracing_disabled = 1;
102 
103 cpumask_var_t __read_mostly	tracing_buffer_mask;
104 
105 /*
106  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
107  *
108  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
109  * is set, then ftrace_dump is called. This will output the contents
110  * of the ftrace buffers to the console.  This is very useful for
111  * capturing traces that lead to crashes and outputing it to a
112  * serial console.
113  *
114  * It is default off, but you can enable it with either specifying
115  * "ftrace_dump_on_oops" in the kernel command line, or setting
116  * /proc/sys/kernel/ftrace_dump_on_oops
117  * Set 1 if you want to dump buffers of all CPUs
118  * Set 2 if you want to dump the buffer of the CPU that triggered oops
119  */
120 
121 enum ftrace_dump_mode ftrace_dump_on_oops;
122 
123 /* When set, tracing will stop when a WARN*() is hit */
124 int __disable_trace_on_warning;
125 
126 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
127 /* Map of enums to their values, for "eval_map" file */
128 struct trace_eval_map_head {
129 	struct module			*mod;
130 	unsigned long			length;
131 };
132 
133 union trace_eval_map_item;
134 
135 struct trace_eval_map_tail {
136 	/*
137 	 * "end" is first and points to NULL as it must be different
138 	 * than "mod" or "eval_string"
139 	 */
140 	union trace_eval_map_item	*next;
141 	const char			*end;	/* points to NULL */
142 };
143 
144 static DEFINE_MUTEX(trace_eval_mutex);
145 
146 /*
147  * The trace_eval_maps are saved in an array with two extra elements,
148  * one at the beginning, and one at the end. The beginning item contains
149  * the count of the saved maps (head.length), and the module they
150  * belong to if not built in (head.mod). The ending item contains a
151  * pointer to the next array of saved eval_map items.
152  */
153 union trace_eval_map_item {
154 	struct trace_eval_map		map;
155 	struct trace_eval_map_head	head;
156 	struct trace_eval_map_tail	tail;
157 };
158 
159 static union trace_eval_map_item *trace_eval_maps;
160 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
161 
162 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
163 static void ftrace_trace_userstack(struct ring_buffer *buffer,
164 				   unsigned long flags, int pc);
165 
166 #define MAX_TRACER_SIZE		100
167 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
168 static char *default_bootup_tracer;
169 
170 static bool allocate_snapshot;
171 
set_cmdline_ftrace(char * str)172 static int __init set_cmdline_ftrace(char *str)
173 {
174 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
175 	default_bootup_tracer = bootup_tracer_buf;
176 	/* We are using ftrace early, expand it */
177 	ring_buffer_expanded = true;
178 	return 1;
179 }
180 __setup("ftrace=", set_cmdline_ftrace);
181 
set_ftrace_dump_on_oops(char * str)182 static int __init set_ftrace_dump_on_oops(char *str)
183 {
184 	if (*str++ != '=' || !*str) {
185 		ftrace_dump_on_oops = DUMP_ALL;
186 		return 1;
187 	}
188 
189 	if (!strcmp("orig_cpu", str)) {
190 		ftrace_dump_on_oops = DUMP_ORIG;
191                 return 1;
192         }
193 
194         return 0;
195 }
196 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
197 
stop_trace_on_warning(char * str)198 static int __init stop_trace_on_warning(char *str)
199 {
200 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
201 		__disable_trace_on_warning = 1;
202 	return 1;
203 }
204 __setup("traceoff_on_warning", stop_trace_on_warning);
205 
boot_alloc_snapshot(char * str)206 static int __init boot_alloc_snapshot(char *str)
207 {
208 	allocate_snapshot = true;
209 	/* We also need the main ring buffer expanded */
210 	ring_buffer_expanded = true;
211 	return 1;
212 }
213 __setup("alloc_snapshot", boot_alloc_snapshot);
214 
215 
216 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
217 
set_trace_boot_options(char * str)218 static int __init set_trace_boot_options(char *str)
219 {
220 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
221 	return 0;
222 }
223 __setup("trace_options=", set_trace_boot_options);
224 
225 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
226 static char *trace_boot_clock __initdata;
227 
set_trace_boot_clock(char * str)228 static int __init set_trace_boot_clock(char *str)
229 {
230 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
231 	trace_boot_clock = trace_boot_clock_buf;
232 	return 0;
233 }
234 __setup("trace_clock=", set_trace_boot_clock);
235 
set_tracepoint_printk(char * str)236 static int __init set_tracepoint_printk(char *str)
237 {
238 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239 		tracepoint_printk = 1;
240 	return 1;
241 }
242 __setup("tp_printk", set_tracepoint_printk);
243 
ns2usecs(u64 nsec)244 unsigned long long ns2usecs(u64 nsec)
245 {
246 	nsec += 500;
247 	do_div(nsec, 1000);
248 	return nsec;
249 }
250 
251 /* trace_flags holds trace_options default values */
252 #define TRACE_DEFAULT_FLAGS						\
253 	(FUNCTION_DEFAULT_FLAGS |					\
254 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
255 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
256 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
257 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
258 
259 /* trace_options that are only supported by global_trace */
260 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
261 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
262 
263 /* trace_flags that are default zero for instances */
264 #define ZEROED_TRACE_FLAGS \
265 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
266 
267 /*
268  * The global_trace is the descriptor that holds the top-level tracing
269  * buffers for the live tracing.
270  */
271 static struct trace_array global_trace = {
272 	.trace_flags = TRACE_DEFAULT_FLAGS,
273 };
274 
275 LIST_HEAD(ftrace_trace_arrays);
276 
trace_array_get(struct trace_array * this_tr)277 int trace_array_get(struct trace_array *this_tr)
278 {
279 	struct trace_array *tr;
280 	int ret = -ENODEV;
281 
282 	mutex_lock(&trace_types_lock);
283 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
284 		if (tr == this_tr) {
285 			tr->ref++;
286 			ret = 0;
287 			break;
288 		}
289 	}
290 	mutex_unlock(&trace_types_lock);
291 
292 	return ret;
293 }
294 
__trace_array_put(struct trace_array * this_tr)295 static void __trace_array_put(struct trace_array *this_tr)
296 {
297 	WARN_ON(!this_tr->ref);
298 	this_tr->ref--;
299 }
300 
trace_array_put(struct trace_array * this_tr)301 void trace_array_put(struct trace_array *this_tr)
302 {
303 	mutex_lock(&trace_types_lock);
304 	__trace_array_put(this_tr);
305 	mutex_unlock(&trace_types_lock);
306 }
307 
tracing_check_open_get_tr(struct trace_array * tr)308 int tracing_check_open_get_tr(struct trace_array *tr)
309 {
310 	int ret;
311 
312 	ret = security_locked_down(LOCKDOWN_TRACEFS);
313 	if (ret)
314 		return ret;
315 
316 	if (tracing_disabled)
317 		return -ENODEV;
318 
319 	if (tr && trace_array_get(tr) < 0)
320 		return -ENODEV;
321 
322 	return 0;
323 }
324 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct ring_buffer * buffer,struct ring_buffer_event * event)325 int call_filter_check_discard(struct trace_event_call *call, void *rec,
326 			      struct ring_buffer *buffer,
327 			      struct ring_buffer_event *event)
328 {
329 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
330 	    !filter_match_preds(call->filter, rec)) {
331 		__trace_event_discard_commit(buffer, event);
332 		return 1;
333 	}
334 
335 	return 0;
336 }
337 
trace_free_pid_list(struct trace_pid_list * pid_list)338 void trace_free_pid_list(struct trace_pid_list *pid_list)
339 {
340 	vfree(pid_list->pids);
341 	kfree(pid_list);
342 }
343 
344 /**
345  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
346  * @filtered_pids: The list of pids to check
347  * @search_pid: The PID to find in @filtered_pids
348  *
349  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
350  */
351 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)352 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
353 {
354 	/*
355 	 * If pid_max changed after filtered_pids was created, we
356 	 * by default ignore all pids greater than the previous pid_max.
357 	 */
358 	if (search_pid >= filtered_pids->pid_max)
359 		return false;
360 
361 	return test_bit(search_pid, filtered_pids->pids);
362 }
363 
364 /**
365  * trace_ignore_this_task - should a task be ignored for tracing
366  * @filtered_pids: The list of pids to check
367  * @task: The task that should be ignored if not filtered
368  *
369  * Checks if @task should be traced or not from @filtered_pids.
370  * Returns true if @task should *NOT* be traced.
371  * Returns false if @task should be traced.
372  */
373 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct task_struct * task)374 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
375 {
376 	/*
377 	 * Return false, because if filtered_pids does not exist,
378 	 * all pids are good to trace.
379 	 */
380 	if (!filtered_pids)
381 		return false;
382 
383 	return !trace_find_filtered_pid(filtered_pids, task->pid);
384 }
385 
386 /**
387  * trace_filter_add_remove_task - Add or remove a task from a pid_list
388  * @pid_list: The list to modify
389  * @self: The current task for fork or NULL for exit
390  * @task: The task to add or remove
391  *
392  * If adding a task, if @self is defined, the task is only added if @self
393  * is also included in @pid_list. This happens on fork and tasks should
394  * only be added when the parent is listed. If @self is NULL, then the
395  * @task pid will be removed from the list, which would happen on exit
396  * of a task.
397  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)398 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
399 				  struct task_struct *self,
400 				  struct task_struct *task)
401 {
402 	if (!pid_list)
403 		return;
404 
405 	/* For forks, we only add if the forking task is listed */
406 	if (self) {
407 		if (!trace_find_filtered_pid(pid_list, self->pid))
408 			return;
409 	}
410 
411 	/* Sorry, but we don't support pid_max changing after setting */
412 	if (task->pid >= pid_list->pid_max)
413 		return;
414 
415 	/* "self" is set for forks, and NULL for exits */
416 	if (self)
417 		set_bit(task->pid, pid_list->pids);
418 	else
419 		clear_bit(task->pid, pid_list->pids);
420 }
421 
422 /**
423  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
424  * @pid_list: The pid list to show
425  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
426  * @pos: The position of the file
427  *
428  * This is used by the seq_file "next" operation to iterate the pids
429  * listed in a trace_pid_list structure.
430  *
431  * Returns the pid+1 as we want to display pid of zero, but NULL would
432  * stop the iteration.
433  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)434 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
435 {
436 	unsigned long pid = (unsigned long)v;
437 
438 	(*pos)++;
439 
440 	/* pid already is +1 of the actual prevous bit */
441 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
442 
443 	/* Return pid + 1 to allow zero to be represented */
444 	if (pid < pid_list->pid_max)
445 		return (void *)(pid + 1);
446 
447 	return NULL;
448 }
449 
450 /**
451  * trace_pid_start - Used for seq_file to start reading pid lists
452  * @pid_list: The pid list to show
453  * @pos: The position of the file
454  *
455  * This is used by seq_file "start" operation to start the iteration
456  * of listing pids.
457  *
458  * Returns the pid+1 as we want to display pid of zero, but NULL would
459  * stop the iteration.
460  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)461 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
462 {
463 	unsigned long pid;
464 	loff_t l = 0;
465 
466 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
467 	if (pid >= pid_list->pid_max)
468 		return NULL;
469 
470 	/* Return pid + 1 so that zero can be the exit value */
471 	for (pid++; pid && l < *pos;
472 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
473 		;
474 	return (void *)pid;
475 }
476 
477 /**
478  * trace_pid_show - show the current pid in seq_file processing
479  * @m: The seq_file structure to write into
480  * @v: A void pointer of the pid (+1) value to display
481  *
482  * Can be directly used by seq_file operations to display the current
483  * pid value.
484  */
trace_pid_show(struct seq_file * m,void * v)485 int trace_pid_show(struct seq_file *m, void *v)
486 {
487 	unsigned long pid = (unsigned long)v - 1;
488 
489 	seq_printf(m, "%lu\n", pid);
490 	return 0;
491 }
492 
493 /* 128 should be much more than enough */
494 #define PID_BUF_SIZE		127
495 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)496 int trace_pid_write(struct trace_pid_list *filtered_pids,
497 		    struct trace_pid_list **new_pid_list,
498 		    const char __user *ubuf, size_t cnt)
499 {
500 	struct trace_pid_list *pid_list;
501 	struct trace_parser parser;
502 	unsigned long val;
503 	int nr_pids = 0;
504 	ssize_t read = 0;
505 	ssize_t ret = 0;
506 	loff_t pos;
507 	pid_t pid;
508 
509 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
510 		return -ENOMEM;
511 
512 	/*
513 	 * Always recreate a new array. The write is an all or nothing
514 	 * operation. Always create a new array when adding new pids by
515 	 * the user. If the operation fails, then the current list is
516 	 * not modified.
517 	 */
518 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
519 	if (!pid_list) {
520 		trace_parser_put(&parser);
521 		return -ENOMEM;
522 	}
523 
524 	pid_list->pid_max = READ_ONCE(pid_max);
525 
526 	/* Only truncating will shrink pid_max */
527 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
528 		pid_list->pid_max = filtered_pids->pid_max;
529 
530 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
531 	if (!pid_list->pids) {
532 		trace_parser_put(&parser);
533 		kfree(pid_list);
534 		return -ENOMEM;
535 	}
536 
537 	if (filtered_pids) {
538 		/* copy the current bits to the new max */
539 		for_each_set_bit(pid, filtered_pids->pids,
540 				 filtered_pids->pid_max) {
541 			set_bit(pid, pid_list->pids);
542 			nr_pids++;
543 		}
544 	}
545 
546 	while (cnt > 0) {
547 
548 		pos = 0;
549 
550 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
551 		if (ret < 0 || !trace_parser_loaded(&parser))
552 			break;
553 
554 		read += ret;
555 		ubuf += ret;
556 		cnt -= ret;
557 
558 		ret = -EINVAL;
559 		if (kstrtoul(parser.buffer, 0, &val))
560 			break;
561 		if (val >= pid_list->pid_max)
562 			break;
563 
564 		pid = (pid_t)val;
565 
566 		set_bit(pid, pid_list->pids);
567 		nr_pids++;
568 
569 		trace_parser_clear(&parser);
570 		ret = 0;
571 	}
572 	trace_parser_put(&parser);
573 
574 	if (ret < 0) {
575 		trace_free_pid_list(pid_list);
576 		return ret;
577 	}
578 
579 	if (!nr_pids) {
580 		/* Cleared the list of pids */
581 		trace_free_pid_list(pid_list);
582 		read = ret;
583 		pid_list = NULL;
584 	}
585 
586 	*new_pid_list = pid_list;
587 
588 	return read;
589 }
590 
buffer_ftrace_now(struct trace_buffer * buf,int cpu)591 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
592 {
593 	u64 ts;
594 
595 	/* Early boot up does not have a buffer yet */
596 	if (!buf->buffer)
597 		return trace_clock_local();
598 
599 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
600 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
601 
602 	return ts;
603 }
604 
ftrace_now(int cpu)605 u64 ftrace_now(int cpu)
606 {
607 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
608 }
609 
610 /**
611  * tracing_is_enabled - Show if global_trace has been disabled
612  *
613  * Shows if the global trace has been enabled or not. It uses the
614  * mirror flag "buffer_disabled" to be used in fast paths such as for
615  * the irqsoff tracer. But it may be inaccurate due to races. If you
616  * need to know the accurate state, use tracing_is_on() which is a little
617  * slower, but accurate.
618  */
tracing_is_enabled(void)619 int tracing_is_enabled(void)
620 {
621 	/*
622 	 * For quick access (irqsoff uses this in fast path), just
623 	 * return the mirror variable of the state of the ring buffer.
624 	 * It's a little racy, but we don't really care.
625 	 */
626 	smp_rmb();
627 	return !global_trace.buffer_disabled;
628 }
629 
630 /*
631  * trace_buf_size is the size in bytes that is allocated
632  * for a buffer. Note, the number of bytes is always rounded
633  * to page size.
634  *
635  * This number is purposely set to a low number of 16384.
636  * If the dump on oops happens, it will be much appreciated
637  * to not have to wait for all that output. Anyway this can be
638  * boot time and run time configurable.
639  */
640 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
641 
642 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
643 
644 /* trace_types holds a link list of available tracers. */
645 static struct tracer		*trace_types __read_mostly;
646 
647 /*
648  * trace_types_lock is used to protect the trace_types list.
649  */
650 DEFINE_MUTEX(trace_types_lock);
651 
652 /*
653  * serialize the access of the ring buffer
654  *
655  * ring buffer serializes readers, but it is low level protection.
656  * The validity of the events (which returns by ring_buffer_peek() ..etc)
657  * are not protected by ring buffer.
658  *
659  * The content of events may become garbage if we allow other process consumes
660  * these events concurrently:
661  *   A) the page of the consumed events may become a normal page
662  *      (not reader page) in ring buffer, and this page will be rewrited
663  *      by events producer.
664  *   B) The page of the consumed events may become a page for splice_read,
665  *      and this page will be returned to system.
666  *
667  * These primitives allow multi process access to different cpu ring buffer
668  * concurrently.
669  *
670  * These primitives don't distinguish read-only and read-consume access.
671  * Multi read-only access are also serialized.
672  */
673 
674 #ifdef CONFIG_SMP
675 static DECLARE_RWSEM(all_cpu_access_lock);
676 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
677 
trace_access_lock(int cpu)678 static inline void trace_access_lock(int cpu)
679 {
680 	if (cpu == RING_BUFFER_ALL_CPUS) {
681 		/* gain it for accessing the whole ring buffer. */
682 		down_write(&all_cpu_access_lock);
683 	} else {
684 		/* gain it for accessing a cpu ring buffer. */
685 
686 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
687 		down_read(&all_cpu_access_lock);
688 
689 		/* Secondly block other access to this @cpu ring buffer. */
690 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
691 	}
692 }
693 
trace_access_unlock(int cpu)694 static inline void trace_access_unlock(int cpu)
695 {
696 	if (cpu == RING_BUFFER_ALL_CPUS) {
697 		up_write(&all_cpu_access_lock);
698 	} else {
699 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
700 		up_read(&all_cpu_access_lock);
701 	}
702 }
703 
trace_access_lock_init(void)704 static inline void trace_access_lock_init(void)
705 {
706 	int cpu;
707 
708 	for_each_possible_cpu(cpu)
709 		mutex_init(&per_cpu(cpu_access_lock, cpu));
710 }
711 
712 #else
713 
714 static DEFINE_MUTEX(access_lock);
715 
trace_access_lock(int cpu)716 static inline void trace_access_lock(int cpu)
717 {
718 	(void)cpu;
719 	mutex_lock(&access_lock);
720 }
721 
trace_access_unlock(int cpu)722 static inline void trace_access_unlock(int cpu)
723 {
724 	(void)cpu;
725 	mutex_unlock(&access_lock);
726 }
727 
trace_access_lock_init(void)728 static inline void trace_access_lock_init(void)
729 {
730 }
731 
732 #endif
733 
734 #ifdef CONFIG_STACKTRACE
735 static void __ftrace_trace_stack(struct ring_buffer *buffer,
736 				 unsigned long flags,
737 				 int skip, int pc, struct pt_regs *regs);
738 static inline void ftrace_trace_stack(struct trace_array *tr,
739 				      struct ring_buffer *buffer,
740 				      unsigned long flags,
741 				      int skip, int pc, struct pt_regs *regs);
742 
743 #else
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)744 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
745 					unsigned long flags,
746 					int skip, int pc, struct pt_regs *regs)
747 {
748 }
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)749 static inline void ftrace_trace_stack(struct trace_array *tr,
750 				      struct ring_buffer *buffer,
751 				      unsigned long flags,
752 				      int skip, int pc, struct pt_regs *regs)
753 {
754 }
755 
756 #endif
757 
758 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)759 trace_event_setup(struct ring_buffer_event *event,
760 		  int type, unsigned long flags, int pc)
761 {
762 	struct trace_entry *ent = ring_buffer_event_data(event);
763 
764 	tracing_generic_entry_update(ent, type, flags, pc);
765 }
766 
767 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)768 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
769 			  int type,
770 			  unsigned long len,
771 			  unsigned long flags, int pc)
772 {
773 	struct ring_buffer_event *event;
774 
775 	event = ring_buffer_lock_reserve(buffer, len);
776 	if (event != NULL)
777 		trace_event_setup(event, type, flags, pc);
778 
779 	return event;
780 }
781 
tracer_tracing_on(struct trace_array * tr)782 void tracer_tracing_on(struct trace_array *tr)
783 {
784 	if (tr->trace_buffer.buffer)
785 		ring_buffer_record_on(tr->trace_buffer.buffer);
786 	/*
787 	 * This flag is looked at when buffers haven't been allocated
788 	 * yet, or by some tracers (like irqsoff), that just want to
789 	 * know if the ring buffer has been disabled, but it can handle
790 	 * races of where it gets disabled but we still do a record.
791 	 * As the check is in the fast path of the tracers, it is more
792 	 * important to be fast than accurate.
793 	 */
794 	tr->buffer_disabled = 0;
795 	/* Make the flag seen by readers */
796 	smp_wmb();
797 }
798 
799 /**
800  * tracing_on - enable tracing buffers
801  *
802  * This function enables tracing buffers that may have been
803  * disabled with tracing_off.
804  */
tracing_on(void)805 void tracing_on(void)
806 {
807 	tracer_tracing_on(&global_trace);
808 }
809 EXPORT_SYMBOL_GPL(tracing_on);
810 
811 
812 static __always_inline void
__buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)813 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
814 {
815 	__this_cpu_write(trace_taskinfo_save, true);
816 
817 	/* If this is the temp buffer, we need to commit fully */
818 	if (this_cpu_read(trace_buffered_event) == event) {
819 		/* Length is in event->array[0] */
820 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
821 		/* Release the temp buffer */
822 		this_cpu_dec(trace_buffered_event_cnt);
823 	} else
824 		ring_buffer_unlock_commit(buffer, event);
825 }
826 
827 /**
828  * __trace_puts - write a constant string into the trace buffer.
829  * @ip:	   The address of the caller
830  * @str:   The constant string to write
831  * @size:  The size of the string.
832  */
__trace_puts(unsigned long ip,const char * str,int size)833 int __trace_puts(unsigned long ip, const char *str, int size)
834 {
835 	struct ring_buffer_event *event;
836 	struct ring_buffer *buffer;
837 	struct print_entry *entry;
838 	unsigned long irq_flags;
839 	int alloc;
840 	int pc;
841 
842 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
843 		return 0;
844 
845 	pc = preempt_count();
846 
847 	if (unlikely(tracing_selftest_running || tracing_disabled))
848 		return 0;
849 
850 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
851 
852 	local_save_flags(irq_flags);
853 	buffer = global_trace.trace_buffer.buffer;
854 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
855 					    irq_flags, pc);
856 	if (!event)
857 		return 0;
858 
859 	entry = ring_buffer_event_data(event);
860 	entry->ip = ip;
861 
862 	memcpy(&entry->buf, str, size);
863 
864 	/* Add a newline if necessary */
865 	if (entry->buf[size - 1] != '\n') {
866 		entry->buf[size] = '\n';
867 		entry->buf[size + 1] = '\0';
868 	} else
869 		entry->buf[size] = '\0';
870 
871 	__buffer_unlock_commit(buffer, event);
872 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
873 
874 	return size;
875 }
876 EXPORT_SYMBOL_GPL(__trace_puts);
877 
878 /**
879  * __trace_bputs - write the pointer to a constant string into trace buffer
880  * @ip:	   The address of the caller
881  * @str:   The constant string to write to the buffer to
882  */
__trace_bputs(unsigned long ip,const char * str)883 int __trace_bputs(unsigned long ip, const char *str)
884 {
885 	struct ring_buffer_event *event;
886 	struct ring_buffer *buffer;
887 	struct bputs_entry *entry;
888 	unsigned long irq_flags;
889 	int size = sizeof(struct bputs_entry);
890 	int pc;
891 
892 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
893 		return 0;
894 
895 	pc = preempt_count();
896 
897 	if (unlikely(tracing_selftest_running || tracing_disabled))
898 		return 0;
899 
900 	local_save_flags(irq_flags);
901 	buffer = global_trace.trace_buffer.buffer;
902 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
903 					    irq_flags, pc);
904 	if (!event)
905 		return 0;
906 
907 	entry = ring_buffer_event_data(event);
908 	entry->ip			= ip;
909 	entry->str			= str;
910 
911 	__buffer_unlock_commit(buffer, event);
912 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
913 
914 	return 1;
915 }
916 EXPORT_SYMBOL_GPL(__trace_bputs);
917 
918 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)919 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
920 {
921 	struct tracer *tracer = tr->current_trace;
922 	unsigned long flags;
923 
924 	if (in_nmi()) {
925 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
926 		internal_trace_puts("*** snapshot is being ignored        ***\n");
927 		return;
928 	}
929 
930 	if (!tr->allocated_snapshot) {
931 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
932 		internal_trace_puts("*** stopping trace here!   ***\n");
933 		tracing_off();
934 		return;
935 	}
936 
937 	/* Note, snapshot can not be used when the tracer uses it */
938 	if (tracer->use_max_tr) {
939 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
940 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
941 		return;
942 	}
943 
944 	local_irq_save(flags);
945 	update_max_tr(tr, current, smp_processor_id(), cond_data);
946 	local_irq_restore(flags);
947 }
948 
tracing_snapshot_instance(struct trace_array * tr)949 void tracing_snapshot_instance(struct trace_array *tr)
950 {
951 	tracing_snapshot_instance_cond(tr, NULL);
952 }
953 
954 /**
955  * tracing_snapshot - take a snapshot of the current buffer.
956  *
957  * This causes a swap between the snapshot buffer and the current live
958  * tracing buffer. You can use this to take snapshots of the live
959  * trace when some condition is triggered, but continue to trace.
960  *
961  * Note, make sure to allocate the snapshot with either
962  * a tracing_snapshot_alloc(), or by doing it manually
963  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
964  *
965  * If the snapshot buffer is not allocated, it will stop tracing.
966  * Basically making a permanent snapshot.
967  */
tracing_snapshot(void)968 void tracing_snapshot(void)
969 {
970 	struct trace_array *tr = &global_trace;
971 
972 	tracing_snapshot_instance(tr);
973 }
974 EXPORT_SYMBOL_GPL(tracing_snapshot);
975 
976 /**
977  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
978  * @tr:		The tracing instance to snapshot
979  * @cond_data:	The data to be tested conditionally, and possibly saved
980  *
981  * This is the same as tracing_snapshot() except that the snapshot is
982  * conditional - the snapshot will only happen if the
983  * cond_snapshot.update() implementation receiving the cond_data
984  * returns true, which means that the trace array's cond_snapshot
985  * update() operation used the cond_data to determine whether the
986  * snapshot should be taken, and if it was, presumably saved it along
987  * with the snapshot.
988  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)989 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
990 {
991 	tracing_snapshot_instance_cond(tr, cond_data);
992 }
993 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
994 
995 /**
996  * tracing_snapshot_cond_data - get the user data associated with a snapshot
997  * @tr:		The tracing instance
998  *
999  * When the user enables a conditional snapshot using
1000  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1001  * with the snapshot.  This accessor is used to retrieve it.
1002  *
1003  * Should not be called from cond_snapshot.update(), since it takes
1004  * the tr->max_lock lock, which the code calling
1005  * cond_snapshot.update() has already done.
1006  *
1007  * Returns the cond_data associated with the trace array's snapshot.
1008  */
tracing_cond_snapshot_data(struct trace_array * tr)1009 void *tracing_cond_snapshot_data(struct trace_array *tr)
1010 {
1011 	void *cond_data = NULL;
1012 
1013 	arch_spin_lock(&tr->max_lock);
1014 
1015 	if (tr->cond_snapshot)
1016 		cond_data = tr->cond_snapshot->cond_data;
1017 
1018 	arch_spin_unlock(&tr->max_lock);
1019 
1020 	return cond_data;
1021 }
1022 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1023 
1024 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1025 					struct trace_buffer *size_buf, int cpu_id);
1026 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1027 
tracing_alloc_snapshot_instance(struct trace_array * tr)1028 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1029 {
1030 	int ret;
1031 
1032 	if (!tr->allocated_snapshot) {
1033 
1034 		/* allocate spare buffer */
1035 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1036 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1037 		if (ret < 0)
1038 			return ret;
1039 
1040 		tr->allocated_snapshot = true;
1041 	}
1042 
1043 	return 0;
1044 }
1045 
free_snapshot(struct trace_array * tr)1046 static void free_snapshot(struct trace_array *tr)
1047 {
1048 	/*
1049 	 * We don't free the ring buffer. instead, resize it because
1050 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1051 	 * we want preserve it.
1052 	 */
1053 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1054 	set_buffer_entries(&tr->max_buffer, 1);
1055 	tracing_reset_online_cpus(&tr->max_buffer);
1056 	tr->allocated_snapshot = false;
1057 }
1058 
1059 /**
1060  * tracing_alloc_snapshot - allocate snapshot buffer.
1061  *
1062  * This only allocates the snapshot buffer if it isn't already
1063  * allocated - it doesn't also take a snapshot.
1064  *
1065  * This is meant to be used in cases where the snapshot buffer needs
1066  * to be set up for events that can't sleep but need to be able to
1067  * trigger a snapshot.
1068  */
tracing_alloc_snapshot(void)1069 int tracing_alloc_snapshot(void)
1070 {
1071 	struct trace_array *tr = &global_trace;
1072 	int ret;
1073 
1074 	ret = tracing_alloc_snapshot_instance(tr);
1075 	WARN_ON(ret < 0);
1076 
1077 	return ret;
1078 }
1079 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1080 
1081 /**
1082  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1083  *
1084  * This is similar to tracing_snapshot(), but it will allocate the
1085  * snapshot buffer if it isn't already allocated. Use this only
1086  * where it is safe to sleep, as the allocation may sleep.
1087  *
1088  * This causes a swap between the snapshot buffer and the current live
1089  * tracing buffer. You can use this to take snapshots of the live
1090  * trace when some condition is triggered, but continue to trace.
1091  */
tracing_snapshot_alloc(void)1092 void tracing_snapshot_alloc(void)
1093 {
1094 	int ret;
1095 
1096 	ret = tracing_alloc_snapshot();
1097 	if (ret < 0)
1098 		return;
1099 
1100 	tracing_snapshot();
1101 }
1102 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1103 
1104 /**
1105  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1106  * @tr:		The tracing instance
1107  * @cond_data:	User data to associate with the snapshot
1108  * @update:	Implementation of the cond_snapshot update function
1109  *
1110  * Check whether the conditional snapshot for the given instance has
1111  * already been enabled, or if the current tracer is already using a
1112  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1113  * save the cond_data and update function inside.
1114  *
1115  * Returns 0 if successful, error otherwise.
1116  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1117 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1118 				 cond_update_fn_t update)
1119 {
1120 	struct cond_snapshot *cond_snapshot;
1121 	int ret = 0;
1122 
1123 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1124 	if (!cond_snapshot)
1125 		return -ENOMEM;
1126 
1127 	cond_snapshot->cond_data = cond_data;
1128 	cond_snapshot->update = update;
1129 
1130 	mutex_lock(&trace_types_lock);
1131 
1132 	ret = tracing_alloc_snapshot_instance(tr);
1133 	if (ret)
1134 		goto fail_unlock;
1135 
1136 	if (tr->current_trace->use_max_tr) {
1137 		ret = -EBUSY;
1138 		goto fail_unlock;
1139 	}
1140 
1141 	/*
1142 	 * The cond_snapshot can only change to NULL without the
1143 	 * trace_types_lock. We don't care if we race with it going
1144 	 * to NULL, but we want to make sure that it's not set to
1145 	 * something other than NULL when we get here, which we can
1146 	 * do safely with only holding the trace_types_lock and not
1147 	 * having to take the max_lock.
1148 	 */
1149 	if (tr->cond_snapshot) {
1150 		ret = -EBUSY;
1151 		goto fail_unlock;
1152 	}
1153 
1154 	arch_spin_lock(&tr->max_lock);
1155 	tr->cond_snapshot = cond_snapshot;
1156 	arch_spin_unlock(&tr->max_lock);
1157 
1158 	mutex_unlock(&trace_types_lock);
1159 
1160 	return ret;
1161 
1162  fail_unlock:
1163 	mutex_unlock(&trace_types_lock);
1164 	kfree(cond_snapshot);
1165 	return ret;
1166 }
1167 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1168 
1169 /**
1170  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1171  * @tr:		The tracing instance
1172  *
1173  * Check whether the conditional snapshot for the given instance is
1174  * enabled; if so, free the cond_snapshot associated with it,
1175  * otherwise return -EINVAL.
1176  *
1177  * Returns 0 if successful, error otherwise.
1178  */
tracing_snapshot_cond_disable(struct trace_array * tr)1179 int tracing_snapshot_cond_disable(struct trace_array *tr)
1180 {
1181 	int ret = 0;
1182 
1183 	arch_spin_lock(&tr->max_lock);
1184 
1185 	if (!tr->cond_snapshot)
1186 		ret = -EINVAL;
1187 	else {
1188 		kfree(tr->cond_snapshot);
1189 		tr->cond_snapshot = NULL;
1190 	}
1191 
1192 	arch_spin_unlock(&tr->max_lock);
1193 
1194 	return ret;
1195 }
1196 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1197 #else
tracing_snapshot(void)1198 void tracing_snapshot(void)
1199 {
1200 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1203 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1204 {
1205 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1208 int tracing_alloc_snapshot(void)
1209 {
1210 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1211 	return -ENODEV;
1212 }
1213 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1214 void tracing_snapshot_alloc(void)
1215 {
1216 	/* Give warning */
1217 	tracing_snapshot();
1218 }
1219 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1220 void *tracing_cond_snapshot_data(struct trace_array *tr)
1221 {
1222 	return NULL;
1223 }
1224 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1225 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1226 {
1227 	return -ENODEV;
1228 }
1229 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1230 int tracing_snapshot_cond_disable(struct trace_array *tr)
1231 {
1232 	return false;
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1235 #endif /* CONFIG_TRACER_SNAPSHOT */
1236 
tracer_tracing_off(struct trace_array * tr)1237 void tracer_tracing_off(struct trace_array *tr)
1238 {
1239 	if (tr->trace_buffer.buffer)
1240 		ring_buffer_record_off(tr->trace_buffer.buffer);
1241 	/*
1242 	 * This flag is looked at when buffers haven't been allocated
1243 	 * yet, or by some tracers (like irqsoff), that just want to
1244 	 * know if the ring buffer has been disabled, but it can handle
1245 	 * races of where it gets disabled but we still do a record.
1246 	 * As the check is in the fast path of the tracers, it is more
1247 	 * important to be fast than accurate.
1248 	 */
1249 	tr->buffer_disabled = 1;
1250 	/* Make the flag seen by readers */
1251 	smp_wmb();
1252 }
1253 
1254 /**
1255  * tracing_off - turn off tracing buffers
1256  *
1257  * This function stops the tracing buffers from recording data.
1258  * It does not disable any overhead the tracers themselves may
1259  * be causing. This function simply causes all recording to
1260  * the ring buffers to fail.
1261  */
tracing_off(void)1262 void tracing_off(void)
1263 {
1264 	tracer_tracing_off(&global_trace);
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_off);
1267 
disable_trace_on_warning(void)1268 void disable_trace_on_warning(void)
1269 {
1270 	if (__disable_trace_on_warning)
1271 		tracing_off();
1272 }
1273 
1274 /**
1275  * tracer_tracing_is_on - show real state of ring buffer enabled
1276  * @tr : the trace array to know if ring buffer is enabled
1277  *
1278  * Shows real state of the ring buffer if it is enabled or not.
1279  */
tracer_tracing_is_on(struct trace_array * tr)1280 bool tracer_tracing_is_on(struct trace_array *tr)
1281 {
1282 	if (tr->trace_buffer.buffer)
1283 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1284 	return !tr->buffer_disabled;
1285 }
1286 
1287 /**
1288  * tracing_is_on - show state of ring buffers enabled
1289  */
tracing_is_on(void)1290 int tracing_is_on(void)
1291 {
1292 	return tracer_tracing_is_on(&global_trace);
1293 }
1294 EXPORT_SYMBOL_GPL(tracing_is_on);
1295 
set_buf_size(char * str)1296 static int __init set_buf_size(char *str)
1297 {
1298 	unsigned long buf_size;
1299 
1300 	if (!str)
1301 		return 0;
1302 	buf_size = memparse(str, &str);
1303 	/* nr_entries can not be zero */
1304 	if (buf_size == 0)
1305 		return 0;
1306 	trace_buf_size = buf_size;
1307 	return 1;
1308 }
1309 __setup("trace_buf_size=", set_buf_size);
1310 
set_tracing_thresh(char * str)1311 static int __init set_tracing_thresh(char *str)
1312 {
1313 	unsigned long threshold;
1314 	int ret;
1315 
1316 	if (!str)
1317 		return 0;
1318 	ret = kstrtoul(str, 0, &threshold);
1319 	if (ret < 0)
1320 		return 0;
1321 	tracing_thresh = threshold * 1000;
1322 	return 1;
1323 }
1324 __setup("tracing_thresh=", set_tracing_thresh);
1325 
nsecs_to_usecs(unsigned long nsecs)1326 unsigned long nsecs_to_usecs(unsigned long nsecs)
1327 {
1328 	return nsecs / 1000;
1329 }
1330 
1331 /*
1332  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1333  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1334  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1335  * of strings in the order that the evals (enum) were defined.
1336  */
1337 #undef C
1338 #define C(a, b) b
1339 
1340 /* These must match the bit postions in trace_iterator_flags */
1341 static const char *trace_options[] = {
1342 	TRACE_FLAGS
1343 	NULL
1344 };
1345 
1346 static struct {
1347 	u64 (*func)(void);
1348 	const char *name;
1349 	int in_ns;		/* is this clock in nanoseconds? */
1350 } trace_clocks[] = {
1351 	{ trace_clock_local,		"local",	1 },
1352 	{ trace_clock_global,		"global",	1 },
1353 	{ trace_clock_counter,		"counter",	0 },
1354 	{ trace_clock_jiffies,		"uptime",	0 },
1355 	{ trace_clock,			"perf",		1 },
1356 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1357 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1358 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1359 	ARCH_TRACE_CLOCKS
1360 };
1361 
trace_clock_in_ns(struct trace_array * tr)1362 bool trace_clock_in_ns(struct trace_array *tr)
1363 {
1364 	if (trace_clocks[tr->clock_id].in_ns)
1365 		return true;
1366 
1367 	return false;
1368 }
1369 
1370 /*
1371  * trace_parser_get_init - gets the buffer for trace parser
1372  */
trace_parser_get_init(struct trace_parser * parser,int size)1373 int trace_parser_get_init(struct trace_parser *parser, int size)
1374 {
1375 	memset(parser, 0, sizeof(*parser));
1376 
1377 	parser->buffer = kmalloc(size, GFP_KERNEL);
1378 	if (!parser->buffer)
1379 		return 1;
1380 
1381 	parser->size = size;
1382 	return 0;
1383 }
1384 
1385 /*
1386  * trace_parser_put - frees the buffer for trace parser
1387  */
trace_parser_put(struct trace_parser * parser)1388 void trace_parser_put(struct trace_parser *parser)
1389 {
1390 	kfree(parser->buffer);
1391 	parser->buffer = NULL;
1392 }
1393 
1394 /*
1395  * trace_get_user - reads the user input string separated by  space
1396  * (matched by isspace(ch))
1397  *
1398  * For each string found the 'struct trace_parser' is updated,
1399  * and the function returns.
1400  *
1401  * Returns number of bytes read.
1402  *
1403  * See kernel/trace/trace.h for 'struct trace_parser' details.
1404  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1405 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1406 	size_t cnt, loff_t *ppos)
1407 {
1408 	char ch;
1409 	size_t read = 0;
1410 	ssize_t ret;
1411 
1412 	if (!*ppos)
1413 		trace_parser_clear(parser);
1414 
1415 	ret = get_user(ch, ubuf++);
1416 	if (ret)
1417 		goto out;
1418 
1419 	read++;
1420 	cnt--;
1421 
1422 	/*
1423 	 * The parser is not finished with the last write,
1424 	 * continue reading the user input without skipping spaces.
1425 	 */
1426 	if (!parser->cont) {
1427 		/* skip white space */
1428 		while (cnt && isspace(ch)) {
1429 			ret = get_user(ch, ubuf++);
1430 			if (ret)
1431 				goto out;
1432 			read++;
1433 			cnt--;
1434 		}
1435 
1436 		parser->idx = 0;
1437 
1438 		/* only spaces were written */
1439 		if (isspace(ch) || !ch) {
1440 			*ppos += read;
1441 			ret = read;
1442 			goto out;
1443 		}
1444 	}
1445 
1446 	/* read the non-space input */
1447 	while (cnt && !isspace(ch) && ch) {
1448 		if (parser->idx < parser->size - 1)
1449 			parser->buffer[parser->idx++] = ch;
1450 		else {
1451 			ret = -EINVAL;
1452 			goto out;
1453 		}
1454 		ret = get_user(ch, ubuf++);
1455 		if (ret)
1456 			goto out;
1457 		read++;
1458 		cnt--;
1459 	}
1460 
1461 	/* We either got finished input or we have to wait for another call. */
1462 	if (isspace(ch) || !ch) {
1463 		parser->buffer[parser->idx] = 0;
1464 		parser->cont = false;
1465 	} else if (parser->idx < parser->size - 1) {
1466 		parser->cont = true;
1467 		parser->buffer[parser->idx++] = ch;
1468 		/* Make sure the parsed string always terminates with '\0'. */
1469 		parser->buffer[parser->idx] = 0;
1470 	} else {
1471 		ret = -EINVAL;
1472 		goto out;
1473 	}
1474 
1475 	*ppos += read;
1476 	ret = read;
1477 
1478 out:
1479 	return ret;
1480 }
1481 
1482 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1483 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1484 {
1485 	int len;
1486 
1487 	if (trace_seq_used(s) <= s->seq.readpos)
1488 		return -EBUSY;
1489 
1490 	len = trace_seq_used(s) - s->seq.readpos;
1491 	if (cnt > len)
1492 		cnt = len;
1493 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1494 
1495 	s->seq.readpos += cnt;
1496 	return cnt;
1497 }
1498 
1499 unsigned long __read_mostly	tracing_thresh;
1500 
1501 #ifdef CONFIG_TRACER_MAX_TRACE
1502 /*
1503  * Copy the new maximum trace into the separate maximum-trace
1504  * structure. (this way the maximum trace is permanently saved,
1505  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1506  */
1507 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1508 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1509 {
1510 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1511 	struct trace_buffer *max_buf = &tr->max_buffer;
1512 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1513 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1514 
1515 	max_buf->cpu = cpu;
1516 	max_buf->time_start = data->preempt_timestamp;
1517 
1518 	max_data->saved_latency = tr->max_latency;
1519 	max_data->critical_start = data->critical_start;
1520 	max_data->critical_end = data->critical_end;
1521 
1522 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1523 	max_data->pid = tsk->pid;
1524 	/*
1525 	 * If tsk == current, then use current_uid(), as that does not use
1526 	 * RCU. The irq tracer can be called out of RCU scope.
1527 	 */
1528 	if (tsk == current)
1529 		max_data->uid = current_uid();
1530 	else
1531 		max_data->uid = task_uid(tsk);
1532 
1533 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1534 	max_data->policy = tsk->policy;
1535 	max_data->rt_priority = tsk->rt_priority;
1536 
1537 	/* record this tasks comm */
1538 	tracing_record_cmdline(tsk);
1539 }
1540 
1541 /**
1542  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1543  * @tr: tracer
1544  * @tsk: the task with the latency
1545  * @cpu: The cpu that initiated the trace.
1546  * @cond_data: User data associated with a conditional snapshot
1547  *
1548  * Flip the buffers between the @tr and the max_tr and record information
1549  * about which task was the cause of this latency.
1550  */
1551 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1552 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1553 	      void *cond_data)
1554 {
1555 	if (tr->stop_count)
1556 		return;
1557 
1558 	WARN_ON_ONCE(!irqs_disabled());
1559 
1560 	if (!tr->allocated_snapshot) {
1561 		/* Only the nop tracer should hit this when disabling */
1562 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1563 		return;
1564 	}
1565 
1566 	arch_spin_lock(&tr->max_lock);
1567 
1568 	/* Inherit the recordable setting from trace_buffer */
1569 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1570 		ring_buffer_record_on(tr->max_buffer.buffer);
1571 	else
1572 		ring_buffer_record_off(tr->max_buffer.buffer);
1573 
1574 #ifdef CONFIG_TRACER_SNAPSHOT
1575 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1576 		goto out_unlock;
1577 #endif
1578 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1579 
1580 	__update_max_tr(tr, tsk, cpu);
1581 
1582  out_unlock:
1583 	arch_spin_unlock(&tr->max_lock);
1584 }
1585 
1586 /**
1587  * update_max_tr_single - only copy one trace over, and reset the rest
1588  * @tr: tracer
1589  * @tsk: task with the latency
1590  * @cpu: the cpu of the buffer to copy.
1591  *
1592  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1593  */
1594 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1595 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1596 {
1597 	int ret;
1598 
1599 	if (tr->stop_count)
1600 		return;
1601 
1602 	WARN_ON_ONCE(!irqs_disabled());
1603 	if (!tr->allocated_snapshot) {
1604 		/* Only the nop tracer should hit this when disabling */
1605 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1606 		return;
1607 	}
1608 
1609 	arch_spin_lock(&tr->max_lock);
1610 
1611 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1612 
1613 	if (ret == -EBUSY) {
1614 		/*
1615 		 * We failed to swap the buffer due to a commit taking
1616 		 * place on this CPU. We fail to record, but we reset
1617 		 * the max trace buffer (no one writes directly to it)
1618 		 * and flag that it failed.
1619 		 */
1620 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1621 			"Failed to swap buffers due to commit in progress\n");
1622 	}
1623 
1624 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1625 
1626 	__update_max_tr(tr, tsk, cpu);
1627 	arch_spin_unlock(&tr->max_lock);
1628 }
1629 #endif /* CONFIG_TRACER_MAX_TRACE */
1630 
wait_on_pipe(struct trace_iterator * iter,int full)1631 static int wait_on_pipe(struct trace_iterator *iter, int full)
1632 {
1633 	/* Iterators are static, they should be filled or empty */
1634 	if (trace_buffer_iter(iter, iter->cpu_file))
1635 		return 0;
1636 
1637 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1638 				full);
1639 }
1640 
1641 #ifdef CONFIG_FTRACE_STARTUP_TEST
1642 static bool selftests_can_run;
1643 
1644 struct trace_selftests {
1645 	struct list_head		list;
1646 	struct tracer			*type;
1647 };
1648 
1649 static LIST_HEAD(postponed_selftests);
1650 
save_selftest(struct tracer * type)1651 static int save_selftest(struct tracer *type)
1652 {
1653 	struct trace_selftests *selftest;
1654 
1655 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1656 	if (!selftest)
1657 		return -ENOMEM;
1658 
1659 	selftest->type = type;
1660 	list_add(&selftest->list, &postponed_selftests);
1661 	return 0;
1662 }
1663 
run_tracer_selftest(struct tracer * type)1664 static int run_tracer_selftest(struct tracer *type)
1665 {
1666 	struct trace_array *tr = &global_trace;
1667 	struct tracer *saved_tracer = tr->current_trace;
1668 	int ret;
1669 
1670 	if (!type->selftest || tracing_selftest_disabled)
1671 		return 0;
1672 
1673 	/*
1674 	 * If a tracer registers early in boot up (before scheduling is
1675 	 * initialized and such), then do not run its selftests yet.
1676 	 * Instead, run it a little later in the boot process.
1677 	 */
1678 	if (!selftests_can_run)
1679 		return save_selftest(type);
1680 
1681 	/*
1682 	 * Run a selftest on this tracer.
1683 	 * Here we reset the trace buffer, and set the current
1684 	 * tracer to be this tracer. The tracer can then run some
1685 	 * internal tracing to verify that everything is in order.
1686 	 * If we fail, we do not register this tracer.
1687 	 */
1688 	tracing_reset_online_cpus(&tr->trace_buffer);
1689 
1690 	tr->current_trace = type;
1691 
1692 #ifdef CONFIG_TRACER_MAX_TRACE
1693 	if (type->use_max_tr) {
1694 		/* If we expanded the buffers, make sure the max is expanded too */
1695 		if (ring_buffer_expanded)
1696 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1697 					   RING_BUFFER_ALL_CPUS);
1698 		tr->allocated_snapshot = true;
1699 	}
1700 #endif
1701 
1702 	/* the test is responsible for initializing and enabling */
1703 	pr_info("Testing tracer %s: ", type->name);
1704 	ret = type->selftest(type, tr);
1705 	/* the test is responsible for resetting too */
1706 	tr->current_trace = saved_tracer;
1707 	if (ret) {
1708 		printk(KERN_CONT "FAILED!\n");
1709 		/* Add the warning after printing 'FAILED' */
1710 		WARN_ON(1);
1711 		return -1;
1712 	}
1713 	/* Only reset on passing, to avoid touching corrupted buffers */
1714 	tracing_reset_online_cpus(&tr->trace_buffer);
1715 
1716 #ifdef CONFIG_TRACER_MAX_TRACE
1717 	if (type->use_max_tr) {
1718 		tr->allocated_snapshot = false;
1719 
1720 		/* Shrink the max buffer again */
1721 		if (ring_buffer_expanded)
1722 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1723 					   RING_BUFFER_ALL_CPUS);
1724 	}
1725 #endif
1726 
1727 	printk(KERN_CONT "PASSED\n");
1728 	return 0;
1729 }
1730 
init_trace_selftests(void)1731 static __init int init_trace_selftests(void)
1732 {
1733 	struct trace_selftests *p, *n;
1734 	struct tracer *t, **last;
1735 	int ret;
1736 
1737 	selftests_can_run = true;
1738 
1739 	mutex_lock(&trace_types_lock);
1740 
1741 	if (list_empty(&postponed_selftests))
1742 		goto out;
1743 
1744 	pr_info("Running postponed tracer tests:\n");
1745 
1746 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1747 		/* This loop can take minutes when sanitizers are enabled, so
1748 		 * lets make sure we allow RCU processing.
1749 		 */
1750 		cond_resched();
1751 		ret = run_tracer_selftest(p->type);
1752 		/* If the test fails, then warn and remove from available_tracers */
1753 		if (ret < 0) {
1754 			WARN(1, "tracer: %s failed selftest, disabling\n",
1755 			     p->type->name);
1756 			last = &trace_types;
1757 			for (t = trace_types; t; t = t->next) {
1758 				if (t == p->type) {
1759 					*last = t->next;
1760 					break;
1761 				}
1762 				last = &t->next;
1763 			}
1764 		}
1765 		list_del(&p->list);
1766 		kfree(p);
1767 	}
1768 
1769  out:
1770 	mutex_unlock(&trace_types_lock);
1771 
1772 	return 0;
1773 }
1774 core_initcall(init_trace_selftests);
1775 #else
run_tracer_selftest(struct tracer * type)1776 static inline int run_tracer_selftest(struct tracer *type)
1777 {
1778 	return 0;
1779 }
1780 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1781 
1782 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1783 
1784 static void __init apply_trace_boot_options(void);
1785 
1786 /**
1787  * register_tracer - register a tracer with the ftrace system.
1788  * @type: the plugin for the tracer
1789  *
1790  * Register a new plugin tracer.
1791  */
register_tracer(struct tracer * type)1792 int __init register_tracer(struct tracer *type)
1793 {
1794 	struct tracer *t;
1795 	int ret = 0;
1796 
1797 	if (!type->name) {
1798 		pr_info("Tracer must have a name\n");
1799 		return -1;
1800 	}
1801 
1802 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1803 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1804 		return -1;
1805 	}
1806 
1807 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1808 		pr_warning("Can not register tracer %s due to lockdown\n",
1809 			   type->name);
1810 		return -EPERM;
1811 	}
1812 
1813 	mutex_lock(&trace_types_lock);
1814 
1815 	tracing_selftest_running = true;
1816 
1817 	for (t = trace_types; t; t = t->next) {
1818 		if (strcmp(type->name, t->name) == 0) {
1819 			/* already found */
1820 			pr_info("Tracer %s already registered\n",
1821 				type->name);
1822 			ret = -1;
1823 			goto out;
1824 		}
1825 	}
1826 
1827 	if (!type->set_flag)
1828 		type->set_flag = &dummy_set_flag;
1829 	if (!type->flags) {
1830 		/*allocate a dummy tracer_flags*/
1831 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1832 		if (!type->flags) {
1833 			ret = -ENOMEM;
1834 			goto out;
1835 		}
1836 		type->flags->val = 0;
1837 		type->flags->opts = dummy_tracer_opt;
1838 	} else
1839 		if (!type->flags->opts)
1840 			type->flags->opts = dummy_tracer_opt;
1841 
1842 	/* store the tracer for __set_tracer_option */
1843 	type->flags->trace = type;
1844 
1845 	ret = run_tracer_selftest(type);
1846 	if (ret < 0)
1847 		goto out;
1848 
1849 	type->next = trace_types;
1850 	trace_types = type;
1851 	add_tracer_options(&global_trace, type);
1852 
1853  out:
1854 	tracing_selftest_running = false;
1855 	mutex_unlock(&trace_types_lock);
1856 
1857 	if (ret || !default_bootup_tracer)
1858 		goto out_unlock;
1859 
1860 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1861 		goto out_unlock;
1862 
1863 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1864 	/* Do we want this tracer to start on bootup? */
1865 	tracing_set_tracer(&global_trace, type->name);
1866 	default_bootup_tracer = NULL;
1867 
1868 	apply_trace_boot_options();
1869 
1870 	/* disable other selftests, since this will break it. */
1871 	tracing_selftest_disabled = true;
1872 #ifdef CONFIG_FTRACE_STARTUP_TEST
1873 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1874 	       type->name);
1875 #endif
1876 
1877  out_unlock:
1878 	return ret;
1879 }
1880 
tracing_reset_cpu(struct trace_buffer * buf,int cpu)1881 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1882 {
1883 	struct ring_buffer *buffer = buf->buffer;
1884 
1885 	if (!buffer)
1886 		return;
1887 
1888 	ring_buffer_record_disable(buffer);
1889 
1890 	/* Make sure all commits have finished */
1891 	synchronize_rcu();
1892 	ring_buffer_reset_cpu(buffer, cpu);
1893 
1894 	ring_buffer_record_enable(buffer);
1895 }
1896 
tracing_reset_online_cpus(struct trace_buffer * buf)1897 void tracing_reset_online_cpus(struct trace_buffer *buf)
1898 {
1899 	struct ring_buffer *buffer = buf->buffer;
1900 	int cpu;
1901 
1902 	if (!buffer)
1903 		return;
1904 
1905 	ring_buffer_record_disable(buffer);
1906 
1907 	/* Make sure all commits have finished */
1908 	synchronize_rcu();
1909 
1910 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1911 
1912 	for_each_online_cpu(cpu)
1913 		ring_buffer_reset_cpu(buffer, cpu);
1914 
1915 	ring_buffer_record_enable(buffer);
1916 }
1917 
1918 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)1919 void tracing_reset_all_online_cpus(void)
1920 {
1921 	struct trace_array *tr;
1922 
1923 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1924 		if (!tr->clear_trace)
1925 			continue;
1926 		tr->clear_trace = false;
1927 		tracing_reset_online_cpus(&tr->trace_buffer);
1928 #ifdef CONFIG_TRACER_MAX_TRACE
1929 		tracing_reset_online_cpus(&tr->max_buffer);
1930 #endif
1931 	}
1932 }
1933 
1934 static int *tgid_map;
1935 
1936 #define SAVED_CMDLINES_DEFAULT 128
1937 #define NO_CMDLINE_MAP UINT_MAX
1938 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1939 struct saved_cmdlines_buffer {
1940 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1941 	unsigned *map_cmdline_to_pid;
1942 	unsigned cmdline_num;
1943 	int cmdline_idx;
1944 	char *saved_cmdlines;
1945 };
1946 static struct saved_cmdlines_buffer *savedcmd;
1947 
1948 /* temporary disable recording */
1949 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1950 
get_saved_cmdlines(int idx)1951 static inline char *get_saved_cmdlines(int idx)
1952 {
1953 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1954 }
1955 
set_cmdline(int idx,const char * cmdline)1956 static inline void set_cmdline(int idx, const char *cmdline)
1957 {
1958 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1959 }
1960 
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)1961 static int allocate_cmdlines_buffer(unsigned int val,
1962 				    struct saved_cmdlines_buffer *s)
1963 {
1964 	s->map_cmdline_to_pid = kmalloc_array(val,
1965 					      sizeof(*s->map_cmdline_to_pid),
1966 					      GFP_KERNEL);
1967 	if (!s->map_cmdline_to_pid)
1968 		return -ENOMEM;
1969 
1970 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1971 	if (!s->saved_cmdlines) {
1972 		kfree(s->map_cmdline_to_pid);
1973 		return -ENOMEM;
1974 	}
1975 
1976 	s->cmdline_idx = 0;
1977 	s->cmdline_num = val;
1978 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1979 	       sizeof(s->map_pid_to_cmdline));
1980 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1981 	       val * sizeof(*s->map_cmdline_to_pid));
1982 
1983 	return 0;
1984 }
1985 
trace_create_savedcmd(void)1986 static int trace_create_savedcmd(void)
1987 {
1988 	int ret;
1989 
1990 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1991 	if (!savedcmd)
1992 		return -ENOMEM;
1993 
1994 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1995 	if (ret < 0) {
1996 		kfree(savedcmd);
1997 		savedcmd = NULL;
1998 		return -ENOMEM;
1999 	}
2000 
2001 	return 0;
2002 }
2003 
is_tracing_stopped(void)2004 int is_tracing_stopped(void)
2005 {
2006 	return global_trace.stop_count;
2007 }
2008 
2009 /**
2010  * tracing_start - quick start of the tracer
2011  *
2012  * If tracing is enabled but was stopped by tracing_stop,
2013  * this will start the tracer back up.
2014  */
tracing_start(void)2015 void tracing_start(void)
2016 {
2017 	struct ring_buffer *buffer;
2018 	unsigned long flags;
2019 
2020 	if (tracing_disabled)
2021 		return;
2022 
2023 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2024 	if (--global_trace.stop_count) {
2025 		if (global_trace.stop_count < 0) {
2026 			/* Someone screwed up their debugging */
2027 			WARN_ON_ONCE(1);
2028 			global_trace.stop_count = 0;
2029 		}
2030 		goto out;
2031 	}
2032 
2033 	/* Prevent the buffers from switching */
2034 	arch_spin_lock(&global_trace.max_lock);
2035 
2036 	buffer = global_trace.trace_buffer.buffer;
2037 	if (buffer)
2038 		ring_buffer_record_enable(buffer);
2039 
2040 #ifdef CONFIG_TRACER_MAX_TRACE
2041 	buffer = global_trace.max_buffer.buffer;
2042 	if (buffer)
2043 		ring_buffer_record_enable(buffer);
2044 #endif
2045 
2046 	arch_spin_unlock(&global_trace.max_lock);
2047 
2048  out:
2049 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2050 }
2051 
tracing_start_tr(struct trace_array * tr)2052 static void tracing_start_tr(struct trace_array *tr)
2053 {
2054 	struct ring_buffer *buffer;
2055 	unsigned long flags;
2056 
2057 	if (tracing_disabled)
2058 		return;
2059 
2060 	/* If global, we need to also start the max tracer */
2061 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2062 		return tracing_start();
2063 
2064 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2065 
2066 	if (--tr->stop_count) {
2067 		if (tr->stop_count < 0) {
2068 			/* Someone screwed up their debugging */
2069 			WARN_ON_ONCE(1);
2070 			tr->stop_count = 0;
2071 		}
2072 		goto out;
2073 	}
2074 
2075 	buffer = tr->trace_buffer.buffer;
2076 	if (buffer)
2077 		ring_buffer_record_enable(buffer);
2078 
2079  out:
2080 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2081 }
2082 
2083 /**
2084  * tracing_stop - quick stop of the tracer
2085  *
2086  * Light weight way to stop tracing. Use in conjunction with
2087  * tracing_start.
2088  */
tracing_stop(void)2089 void tracing_stop(void)
2090 {
2091 	struct ring_buffer *buffer;
2092 	unsigned long flags;
2093 
2094 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2095 	if (global_trace.stop_count++)
2096 		goto out;
2097 
2098 	/* Prevent the buffers from switching */
2099 	arch_spin_lock(&global_trace.max_lock);
2100 
2101 	buffer = global_trace.trace_buffer.buffer;
2102 	if (buffer)
2103 		ring_buffer_record_disable(buffer);
2104 
2105 #ifdef CONFIG_TRACER_MAX_TRACE
2106 	buffer = global_trace.max_buffer.buffer;
2107 	if (buffer)
2108 		ring_buffer_record_disable(buffer);
2109 #endif
2110 
2111 	arch_spin_unlock(&global_trace.max_lock);
2112 
2113  out:
2114 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2115 }
2116 
tracing_stop_tr(struct trace_array * tr)2117 static void tracing_stop_tr(struct trace_array *tr)
2118 {
2119 	struct ring_buffer *buffer;
2120 	unsigned long flags;
2121 
2122 	/* If global, we need to also stop the max tracer */
2123 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2124 		return tracing_stop();
2125 
2126 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2127 	if (tr->stop_count++)
2128 		goto out;
2129 
2130 	buffer = tr->trace_buffer.buffer;
2131 	if (buffer)
2132 		ring_buffer_record_disable(buffer);
2133 
2134  out:
2135 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2136 }
2137 
trace_save_cmdline(struct task_struct * tsk)2138 static int trace_save_cmdline(struct task_struct *tsk)
2139 {
2140 	unsigned pid, idx;
2141 
2142 	/* treat recording of idle task as a success */
2143 	if (!tsk->pid)
2144 		return 1;
2145 
2146 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2147 		return 0;
2148 
2149 	/*
2150 	 * It's not the end of the world if we don't get
2151 	 * the lock, but we also don't want to spin
2152 	 * nor do we want to disable interrupts,
2153 	 * so if we miss here, then better luck next time.
2154 	 */
2155 	if (!arch_spin_trylock(&trace_cmdline_lock))
2156 		return 0;
2157 
2158 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2159 	if (idx == NO_CMDLINE_MAP) {
2160 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2161 
2162 		/*
2163 		 * Check whether the cmdline buffer at idx has a pid
2164 		 * mapped. We are going to overwrite that entry so we
2165 		 * need to clear the map_pid_to_cmdline. Otherwise we
2166 		 * would read the new comm for the old pid.
2167 		 */
2168 		pid = savedcmd->map_cmdline_to_pid[idx];
2169 		if (pid != NO_CMDLINE_MAP)
2170 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2171 
2172 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2173 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2174 
2175 		savedcmd->cmdline_idx = idx;
2176 	}
2177 
2178 	set_cmdline(idx, tsk->comm);
2179 
2180 	arch_spin_unlock(&trace_cmdline_lock);
2181 
2182 	return 1;
2183 }
2184 
__trace_find_cmdline(int pid,char comm[])2185 static void __trace_find_cmdline(int pid, char comm[])
2186 {
2187 	unsigned map;
2188 
2189 	if (!pid) {
2190 		strcpy(comm, "<idle>");
2191 		return;
2192 	}
2193 
2194 	if (WARN_ON_ONCE(pid < 0)) {
2195 		strcpy(comm, "<XXX>");
2196 		return;
2197 	}
2198 
2199 	if (pid > PID_MAX_DEFAULT) {
2200 		strcpy(comm, "<...>");
2201 		return;
2202 	}
2203 
2204 	map = savedcmd->map_pid_to_cmdline[pid];
2205 	if (map != NO_CMDLINE_MAP)
2206 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2207 	else
2208 		strcpy(comm, "<...>");
2209 }
2210 
trace_find_cmdline(int pid,char comm[])2211 void trace_find_cmdline(int pid, char comm[])
2212 {
2213 	preempt_disable();
2214 	arch_spin_lock(&trace_cmdline_lock);
2215 
2216 	__trace_find_cmdline(pid, comm);
2217 
2218 	arch_spin_unlock(&trace_cmdline_lock);
2219 	preempt_enable();
2220 }
2221 
trace_find_tgid(int pid)2222 int trace_find_tgid(int pid)
2223 {
2224 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2225 		return 0;
2226 
2227 	return tgid_map[pid];
2228 }
2229 
trace_save_tgid(struct task_struct * tsk)2230 static int trace_save_tgid(struct task_struct *tsk)
2231 {
2232 	/* treat recording of idle task as a success */
2233 	if (!tsk->pid)
2234 		return 1;
2235 
2236 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2237 		return 0;
2238 
2239 	tgid_map[tsk->pid] = tsk->tgid;
2240 	return 1;
2241 }
2242 
tracing_record_taskinfo_skip(int flags)2243 static bool tracing_record_taskinfo_skip(int flags)
2244 {
2245 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2246 		return true;
2247 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2248 		return true;
2249 	if (!__this_cpu_read(trace_taskinfo_save))
2250 		return true;
2251 	return false;
2252 }
2253 
2254 /**
2255  * tracing_record_taskinfo - record the task info of a task
2256  *
2257  * @task:  task to record
2258  * @flags: TRACE_RECORD_CMDLINE for recording comm
2259  *         TRACE_RECORD_TGID for recording tgid
2260  */
tracing_record_taskinfo(struct task_struct * task,int flags)2261 void tracing_record_taskinfo(struct task_struct *task, int flags)
2262 {
2263 	bool done;
2264 
2265 	if (tracing_record_taskinfo_skip(flags))
2266 		return;
2267 
2268 	/*
2269 	 * Record as much task information as possible. If some fail, continue
2270 	 * to try to record the others.
2271 	 */
2272 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2273 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2274 
2275 	/* If recording any information failed, retry again soon. */
2276 	if (!done)
2277 		return;
2278 
2279 	__this_cpu_write(trace_taskinfo_save, false);
2280 }
2281 
2282 /**
2283  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2284  *
2285  * @prev: previous task during sched_switch
2286  * @next: next task during sched_switch
2287  * @flags: TRACE_RECORD_CMDLINE for recording comm
2288  *         TRACE_RECORD_TGID for recording tgid
2289  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2290 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2291 					  struct task_struct *next, int flags)
2292 {
2293 	bool done;
2294 
2295 	if (tracing_record_taskinfo_skip(flags))
2296 		return;
2297 
2298 	/*
2299 	 * Record as much task information as possible. If some fail, continue
2300 	 * to try to record the others.
2301 	 */
2302 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2303 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2304 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2305 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2306 
2307 	/* If recording any information failed, retry again soon. */
2308 	if (!done)
2309 		return;
2310 
2311 	__this_cpu_write(trace_taskinfo_save, false);
2312 }
2313 
2314 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2315 void tracing_record_cmdline(struct task_struct *task)
2316 {
2317 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2318 }
2319 
tracing_record_tgid(struct task_struct * task)2320 void tracing_record_tgid(struct task_struct *task)
2321 {
2322 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2323 }
2324 
2325 /*
2326  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2327  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2328  * simplifies those functions and keeps them in sync.
2329  */
trace_handle_return(struct trace_seq * s)2330 enum print_line_t trace_handle_return(struct trace_seq *s)
2331 {
2332 	return trace_seq_has_overflowed(s) ?
2333 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2334 }
2335 EXPORT_SYMBOL_GPL(trace_handle_return);
2336 
2337 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2338 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2339 			     unsigned long flags, int pc)
2340 {
2341 	struct task_struct *tsk = current;
2342 
2343 	entry->preempt_count		= pc & 0xff;
2344 	entry->pid			= (tsk) ? tsk->pid : 0;
2345 	entry->type			= type;
2346 	entry->flags =
2347 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2348 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2349 #else
2350 		TRACE_FLAG_IRQS_NOSUPPORT |
2351 #endif
2352 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2353 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2354 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2355 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2356 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2357 }
2358 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2359 
2360 struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2361 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2362 			  int type,
2363 			  unsigned long len,
2364 			  unsigned long flags, int pc)
2365 {
2366 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2367 }
2368 
2369 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2370 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2371 static int trace_buffered_event_ref;
2372 
2373 /**
2374  * trace_buffered_event_enable - enable buffering events
2375  *
2376  * When events are being filtered, it is quicker to use a temporary
2377  * buffer to write the event data into if there's a likely chance
2378  * that it will not be committed. The discard of the ring buffer
2379  * is not as fast as committing, and is much slower than copying
2380  * a commit.
2381  *
2382  * When an event is to be filtered, allocate per cpu buffers to
2383  * write the event data into, and if the event is filtered and discarded
2384  * it is simply dropped, otherwise, the entire data is to be committed
2385  * in one shot.
2386  */
trace_buffered_event_enable(void)2387 void trace_buffered_event_enable(void)
2388 {
2389 	struct ring_buffer_event *event;
2390 	struct page *page;
2391 	int cpu;
2392 
2393 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2394 
2395 	if (trace_buffered_event_ref++)
2396 		return;
2397 
2398 	for_each_tracing_cpu(cpu) {
2399 		page = alloc_pages_node(cpu_to_node(cpu),
2400 					GFP_KERNEL | __GFP_NORETRY, 0);
2401 		if (!page)
2402 			goto failed;
2403 
2404 		event = page_address(page);
2405 		memset(event, 0, sizeof(*event));
2406 
2407 		per_cpu(trace_buffered_event, cpu) = event;
2408 
2409 		preempt_disable();
2410 		if (cpu == smp_processor_id() &&
2411 		    this_cpu_read(trace_buffered_event) !=
2412 		    per_cpu(trace_buffered_event, cpu))
2413 			WARN_ON_ONCE(1);
2414 		preempt_enable();
2415 	}
2416 
2417 	return;
2418  failed:
2419 	trace_buffered_event_disable();
2420 }
2421 
enable_trace_buffered_event(void * data)2422 static void enable_trace_buffered_event(void *data)
2423 {
2424 	/* Probably not needed, but do it anyway */
2425 	smp_rmb();
2426 	this_cpu_dec(trace_buffered_event_cnt);
2427 }
2428 
disable_trace_buffered_event(void * data)2429 static void disable_trace_buffered_event(void *data)
2430 {
2431 	this_cpu_inc(trace_buffered_event_cnt);
2432 }
2433 
2434 /**
2435  * trace_buffered_event_disable - disable buffering events
2436  *
2437  * When a filter is removed, it is faster to not use the buffered
2438  * events, and to commit directly into the ring buffer. Free up
2439  * the temp buffers when there are no more users. This requires
2440  * special synchronization with current events.
2441  */
trace_buffered_event_disable(void)2442 void trace_buffered_event_disable(void)
2443 {
2444 	int cpu;
2445 
2446 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2447 
2448 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2449 		return;
2450 
2451 	if (--trace_buffered_event_ref)
2452 		return;
2453 
2454 	preempt_disable();
2455 	/* For each CPU, set the buffer as used. */
2456 	smp_call_function_many(tracing_buffer_mask,
2457 			       disable_trace_buffered_event, NULL, 1);
2458 	preempt_enable();
2459 
2460 	/* Wait for all current users to finish */
2461 	synchronize_rcu();
2462 
2463 	for_each_tracing_cpu(cpu) {
2464 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2465 		per_cpu(trace_buffered_event, cpu) = NULL;
2466 	}
2467 	/*
2468 	 * Make sure trace_buffered_event is NULL before clearing
2469 	 * trace_buffered_event_cnt.
2470 	 */
2471 	smp_wmb();
2472 
2473 	preempt_disable();
2474 	/* Do the work on each cpu */
2475 	smp_call_function_many(tracing_buffer_mask,
2476 			       enable_trace_buffered_event, NULL, 1);
2477 	preempt_enable();
2478 }
2479 
2480 static struct ring_buffer *temp_buffer;
2481 
2482 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2483 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2484 			  struct trace_event_file *trace_file,
2485 			  int type, unsigned long len,
2486 			  unsigned long flags, int pc)
2487 {
2488 	struct ring_buffer_event *entry;
2489 	int val;
2490 
2491 	*current_rb = trace_file->tr->trace_buffer.buffer;
2492 
2493 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2494 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2495 	    (entry = this_cpu_read(trace_buffered_event))) {
2496 		/* Try to use the per cpu buffer first */
2497 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2498 		if (val == 1) {
2499 			trace_event_setup(entry, type, flags, pc);
2500 			entry->array[0] = len;
2501 			return entry;
2502 		}
2503 		this_cpu_dec(trace_buffered_event_cnt);
2504 	}
2505 
2506 	entry = __trace_buffer_lock_reserve(*current_rb,
2507 					    type, len, flags, pc);
2508 	/*
2509 	 * If tracing is off, but we have triggers enabled
2510 	 * we still need to look at the event data. Use the temp_buffer
2511 	 * to store the trace event for the tigger to use. It's recusive
2512 	 * safe and will not be recorded anywhere.
2513 	 */
2514 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2515 		*current_rb = temp_buffer;
2516 		entry = __trace_buffer_lock_reserve(*current_rb,
2517 						    type, len, flags, pc);
2518 	}
2519 	return entry;
2520 }
2521 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2522 
2523 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2524 static DEFINE_MUTEX(tracepoint_printk_mutex);
2525 
output_printk(struct trace_event_buffer * fbuffer)2526 static void output_printk(struct trace_event_buffer *fbuffer)
2527 {
2528 	struct trace_event_call *event_call;
2529 	struct trace_event *event;
2530 	unsigned long flags;
2531 	struct trace_iterator *iter = tracepoint_print_iter;
2532 
2533 	/* We should never get here if iter is NULL */
2534 	if (WARN_ON_ONCE(!iter))
2535 		return;
2536 
2537 	event_call = fbuffer->trace_file->event_call;
2538 	if (!event_call || !event_call->event.funcs ||
2539 	    !event_call->event.funcs->trace)
2540 		return;
2541 
2542 	event = &fbuffer->trace_file->event_call->event;
2543 
2544 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2545 	trace_seq_init(&iter->seq);
2546 	iter->ent = fbuffer->entry;
2547 	event_call->event.funcs->trace(iter, 0, event);
2548 	trace_seq_putc(&iter->seq, 0);
2549 	printk("%s", iter->seq.buffer);
2550 
2551 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2552 }
2553 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2554 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2555 			     void __user *buffer, size_t *lenp,
2556 			     loff_t *ppos)
2557 {
2558 	int save_tracepoint_printk;
2559 	int ret;
2560 
2561 	mutex_lock(&tracepoint_printk_mutex);
2562 	save_tracepoint_printk = tracepoint_printk;
2563 
2564 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2565 
2566 	/*
2567 	 * This will force exiting early, as tracepoint_printk
2568 	 * is always zero when tracepoint_printk_iter is not allocated
2569 	 */
2570 	if (!tracepoint_print_iter)
2571 		tracepoint_printk = 0;
2572 
2573 	if (save_tracepoint_printk == tracepoint_printk)
2574 		goto out;
2575 
2576 	if (tracepoint_printk)
2577 		static_key_enable(&tracepoint_printk_key.key);
2578 	else
2579 		static_key_disable(&tracepoint_printk_key.key);
2580 
2581  out:
2582 	mutex_unlock(&tracepoint_printk_mutex);
2583 
2584 	return ret;
2585 }
2586 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2587 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2588 {
2589 	if (static_key_false(&tracepoint_printk_key.key))
2590 		output_printk(fbuffer);
2591 
2592 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2593 				    fbuffer->event, fbuffer->entry,
2594 				    fbuffer->flags, fbuffer->pc);
2595 }
2596 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2597 
2598 /*
2599  * Skip 3:
2600  *
2601  *   trace_buffer_unlock_commit_regs()
2602  *   trace_event_buffer_commit()
2603  *   trace_event_raw_event_xxx()
2604  */
2605 # define STACK_SKIP 3
2606 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2607 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2608 				     struct ring_buffer *buffer,
2609 				     struct ring_buffer_event *event,
2610 				     unsigned long flags, int pc,
2611 				     struct pt_regs *regs)
2612 {
2613 	__buffer_unlock_commit(buffer, event);
2614 
2615 	/*
2616 	 * If regs is not set, then skip the necessary functions.
2617 	 * Note, we can still get here via blktrace, wakeup tracer
2618 	 * and mmiotrace, but that's ok if they lose a function or
2619 	 * two. They are not that meaningful.
2620 	 */
2621 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2622 	ftrace_trace_userstack(buffer, flags, pc);
2623 }
2624 
2625 /*
2626  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2627  */
2628 void
trace_buffer_unlock_commit_nostack(struct ring_buffer * buffer,struct ring_buffer_event * event)2629 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2630 				   struct ring_buffer_event *event)
2631 {
2632 	__buffer_unlock_commit(buffer, event);
2633 }
2634 
2635 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event)2636 trace_process_export(struct trace_export *export,
2637 	       struct ring_buffer_event *event)
2638 {
2639 	struct trace_entry *entry;
2640 	unsigned int size = 0;
2641 
2642 	entry = ring_buffer_event_data(event);
2643 	size = ring_buffer_event_length(event);
2644 	export->write(export, entry, size);
2645 }
2646 
2647 static DEFINE_MUTEX(ftrace_export_lock);
2648 
2649 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2650 
2651 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2652 
ftrace_exports_enable(void)2653 static inline void ftrace_exports_enable(void)
2654 {
2655 	static_branch_enable(&ftrace_exports_enabled);
2656 }
2657 
ftrace_exports_disable(void)2658 static inline void ftrace_exports_disable(void)
2659 {
2660 	static_branch_disable(&ftrace_exports_enabled);
2661 }
2662 
ftrace_exports(struct ring_buffer_event * event)2663 static void ftrace_exports(struct ring_buffer_event *event)
2664 {
2665 	struct trace_export *export;
2666 
2667 	preempt_disable_notrace();
2668 
2669 	export = rcu_dereference_raw_check(ftrace_exports_list);
2670 	while (export) {
2671 		trace_process_export(export, event);
2672 		export = rcu_dereference_raw_check(export->next);
2673 	}
2674 
2675 	preempt_enable_notrace();
2676 }
2677 
2678 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)2679 add_trace_export(struct trace_export **list, struct trace_export *export)
2680 {
2681 	rcu_assign_pointer(export->next, *list);
2682 	/*
2683 	 * We are entering export into the list but another
2684 	 * CPU might be walking that list. We need to make sure
2685 	 * the export->next pointer is valid before another CPU sees
2686 	 * the export pointer included into the list.
2687 	 */
2688 	rcu_assign_pointer(*list, export);
2689 }
2690 
2691 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)2692 rm_trace_export(struct trace_export **list, struct trace_export *export)
2693 {
2694 	struct trace_export **p;
2695 
2696 	for (p = list; *p != NULL; p = &(*p)->next)
2697 		if (*p == export)
2698 			break;
2699 
2700 	if (*p != export)
2701 		return -1;
2702 
2703 	rcu_assign_pointer(*p, (*p)->next);
2704 
2705 	return 0;
2706 }
2707 
2708 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)2709 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2710 {
2711 	if (*list == NULL)
2712 		ftrace_exports_enable();
2713 
2714 	add_trace_export(list, export);
2715 }
2716 
2717 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)2718 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2719 {
2720 	int ret;
2721 
2722 	ret = rm_trace_export(list, export);
2723 	if (*list == NULL)
2724 		ftrace_exports_disable();
2725 
2726 	return ret;
2727 }
2728 
register_ftrace_export(struct trace_export * export)2729 int register_ftrace_export(struct trace_export *export)
2730 {
2731 	if (WARN_ON_ONCE(!export->write))
2732 		return -1;
2733 
2734 	mutex_lock(&ftrace_export_lock);
2735 
2736 	add_ftrace_export(&ftrace_exports_list, export);
2737 
2738 	mutex_unlock(&ftrace_export_lock);
2739 
2740 	return 0;
2741 }
2742 EXPORT_SYMBOL_GPL(register_ftrace_export);
2743 
unregister_ftrace_export(struct trace_export * export)2744 int unregister_ftrace_export(struct trace_export *export)
2745 {
2746 	int ret;
2747 
2748 	mutex_lock(&ftrace_export_lock);
2749 
2750 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2751 
2752 	mutex_unlock(&ftrace_export_lock);
2753 
2754 	return ret;
2755 }
2756 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2757 
2758 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2759 trace_function(struct trace_array *tr,
2760 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2761 	       int pc)
2762 {
2763 	struct trace_event_call *call = &event_function;
2764 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2765 	struct ring_buffer_event *event;
2766 	struct ftrace_entry *entry;
2767 
2768 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2769 					    flags, pc);
2770 	if (!event)
2771 		return;
2772 	entry	= ring_buffer_event_data(event);
2773 	entry->ip			= ip;
2774 	entry->parent_ip		= parent_ip;
2775 
2776 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2777 		if (static_branch_unlikely(&ftrace_exports_enabled))
2778 			ftrace_exports(event);
2779 		__buffer_unlock_commit(buffer, event);
2780 	}
2781 }
2782 
2783 #ifdef CONFIG_STACKTRACE
2784 
2785 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2786 #define FTRACE_KSTACK_NESTING	4
2787 
2788 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2789 
2790 struct ftrace_stack {
2791 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2792 };
2793 
2794 
2795 struct ftrace_stacks {
2796 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2797 };
2798 
2799 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2800 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2801 
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2802 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2803 				 unsigned long flags,
2804 				 int skip, int pc, struct pt_regs *regs)
2805 {
2806 	struct trace_event_call *call = &event_kernel_stack;
2807 	struct ring_buffer_event *event;
2808 	unsigned int size, nr_entries;
2809 	struct ftrace_stack *fstack;
2810 	struct stack_entry *entry;
2811 	int stackidx;
2812 
2813 	/*
2814 	 * Add one, for this function and the call to save_stack_trace()
2815 	 * If regs is set, then these functions will not be in the way.
2816 	 */
2817 #ifndef CONFIG_UNWINDER_ORC
2818 	if (!regs)
2819 		skip++;
2820 #endif
2821 
2822 	/*
2823 	 * Since events can happen in NMIs there's no safe way to
2824 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2825 	 * or NMI comes in, it will just have to use the default
2826 	 * FTRACE_STACK_SIZE.
2827 	 */
2828 	preempt_disable_notrace();
2829 
2830 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2831 
2832 	/* This should never happen. If it does, yell once and skip */
2833 	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2834 		goto out;
2835 
2836 	/*
2837 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2838 	 * interrupt will either see the value pre increment or post
2839 	 * increment. If the interrupt happens pre increment it will have
2840 	 * restored the counter when it returns.  We just need a barrier to
2841 	 * keep gcc from moving things around.
2842 	 */
2843 	barrier();
2844 
2845 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2846 	size = ARRAY_SIZE(fstack->calls);
2847 
2848 	if (regs) {
2849 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2850 						   size, skip);
2851 	} else {
2852 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2853 	}
2854 
2855 	size = nr_entries * sizeof(unsigned long);
2856 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2857 					    sizeof(*entry) + size, flags, pc);
2858 	if (!event)
2859 		goto out;
2860 	entry = ring_buffer_event_data(event);
2861 
2862 	memcpy(&entry->caller, fstack->calls, size);
2863 	entry->size = nr_entries;
2864 
2865 	if (!call_filter_check_discard(call, entry, buffer, event))
2866 		__buffer_unlock_commit(buffer, event);
2867 
2868  out:
2869 	/* Again, don't let gcc optimize things here */
2870 	barrier();
2871 	__this_cpu_dec(ftrace_stack_reserve);
2872 	preempt_enable_notrace();
2873 
2874 }
2875 
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2876 static inline void ftrace_trace_stack(struct trace_array *tr,
2877 				      struct ring_buffer *buffer,
2878 				      unsigned long flags,
2879 				      int skip, int pc, struct pt_regs *regs)
2880 {
2881 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2882 		return;
2883 
2884 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2885 }
2886 
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)2887 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2888 		   int pc)
2889 {
2890 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2891 
2892 	if (rcu_is_watching()) {
2893 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2894 		return;
2895 	}
2896 
2897 	/*
2898 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2899 	 * but if the above rcu_is_watching() failed, then the NMI
2900 	 * triggered someplace critical, and rcu_irq_enter() should
2901 	 * not be called from NMI.
2902 	 */
2903 	if (unlikely(in_nmi()))
2904 		return;
2905 
2906 	rcu_irq_enter_irqson();
2907 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2908 	rcu_irq_exit_irqson();
2909 }
2910 
2911 /**
2912  * trace_dump_stack - record a stack back trace in the trace buffer
2913  * @skip: Number of functions to skip (helper handlers)
2914  */
trace_dump_stack(int skip)2915 void trace_dump_stack(int skip)
2916 {
2917 	unsigned long flags;
2918 
2919 	if (tracing_disabled || tracing_selftest_running)
2920 		return;
2921 
2922 	local_save_flags(flags);
2923 
2924 #ifndef CONFIG_UNWINDER_ORC
2925 	/* Skip 1 to skip this function. */
2926 	skip++;
2927 #endif
2928 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2929 			     flags, skip, preempt_count(), NULL);
2930 }
2931 EXPORT_SYMBOL_GPL(trace_dump_stack);
2932 
2933 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2934 static DEFINE_PER_CPU(int, user_stack_count);
2935 
2936 static void
ftrace_trace_userstack(struct ring_buffer * buffer,unsigned long flags,int pc)2937 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2938 {
2939 	struct trace_event_call *call = &event_user_stack;
2940 	struct ring_buffer_event *event;
2941 	struct userstack_entry *entry;
2942 
2943 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2944 		return;
2945 
2946 	/*
2947 	 * NMIs can not handle page faults, even with fix ups.
2948 	 * The save user stack can (and often does) fault.
2949 	 */
2950 	if (unlikely(in_nmi()))
2951 		return;
2952 
2953 	/*
2954 	 * prevent recursion, since the user stack tracing may
2955 	 * trigger other kernel events.
2956 	 */
2957 	preempt_disable();
2958 	if (__this_cpu_read(user_stack_count))
2959 		goto out;
2960 
2961 	__this_cpu_inc(user_stack_count);
2962 
2963 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2964 					    sizeof(*entry), flags, pc);
2965 	if (!event)
2966 		goto out_drop_count;
2967 	entry	= ring_buffer_event_data(event);
2968 
2969 	entry->tgid		= current->tgid;
2970 	memset(&entry->caller, 0, sizeof(entry->caller));
2971 
2972 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2973 	if (!call_filter_check_discard(call, entry, buffer, event))
2974 		__buffer_unlock_commit(buffer, event);
2975 
2976  out_drop_count:
2977 	__this_cpu_dec(user_stack_count);
2978  out:
2979 	preempt_enable();
2980 }
2981 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct ring_buffer * buffer,unsigned long flags,int pc)2982 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2983 				   unsigned long flags, int pc)
2984 {
2985 }
2986 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2987 
2988 #endif /* CONFIG_STACKTRACE */
2989 
2990 /* created for use with alloc_percpu */
2991 struct trace_buffer_struct {
2992 	int nesting;
2993 	char buffer[4][TRACE_BUF_SIZE];
2994 };
2995 
2996 static struct trace_buffer_struct *trace_percpu_buffer;
2997 
2998 /*
2999  * Thise allows for lockless recording.  If we're nested too deeply, then
3000  * this returns NULL.
3001  */
get_trace_buf(void)3002 static char *get_trace_buf(void)
3003 {
3004 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3005 
3006 	if (!buffer || buffer->nesting >= 4)
3007 		return NULL;
3008 
3009 	buffer->nesting++;
3010 
3011 	/* Interrupts must see nesting incremented before we use the buffer */
3012 	barrier();
3013 	return &buffer->buffer[buffer->nesting][0];
3014 }
3015 
put_trace_buf(void)3016 static void put_trace_buf(void)
3017 {
3018 	/* Don't let the decrement of nesting leak before this */
3019 	barrier();
3020 	this_cpu_dec(trace_percpu_buffer->nesting);
3021 }
3022 
alloc_percpu_trace_buffer(void)3023 static int alloc_percpu_trace_buffer(void)
3024 {
3025 	struct trace_buffer_struct *buffers;
3026 
3027 	buffers = alloc_percpu(struct trace_buffer_struct);
3028 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3029 		return -ENOMEM;
3030 
3031 	trace_percpu_buffer = buffers;
3032 	return 0;
3033 }
3034 
3035 static int buffers_allocated;
3036 
trace_printk_init_buffers(void)3037 void trace_printk_init_buffers(void)
3038 {
3039 	if (buffers_allocated)
3040 		return;
3041 
3042 	if (alloc_percpu_trace_buffer())
3043 		return;
3044 
3045 	/* trace_printk() is for debug use only. Don't use it in production. */
3046 
3047 	pr_warn("\n");
3048 	pr_warn("**********************************************************\n");
3049 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3050 	pr_warn("**                                                      **\n");
3051 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3052 	pr_warn("**                                                      **\n");
3053 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3054 	pr_warn("** unsafe for production use.                           **\n");
3055 	pr_warn("**                                                      **\n");
3056 	pr_warn("** If you see this message and you are not debugging    **\n");
3057 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3058 	pr_warn("**                                                      **\n");
3059 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3060 	pr_warn("**********************************************************\n");
3061 
3062 	/* Expand the buffers to set size */
3063 	tracing_update_buffers();
3064 
3065 	buffers_allocated = 1;
3066 
3067 	/*
3068 	 * trace_printk_init_buffers() can be called by modules.
3069 	 * If that happens, then we need to start cmdline recording
3070 	 * directly here. If the global_trace.buffer is already
3071 	 * allocated here, then this was called by module code.
3072 	 */
3073 	if (global_trace.trace_buffer.buffer)
3074 		tracing_start_cmdline_record();
3075 }
3076 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3077 
trace_printk_start_comm(void)3078 void trace_printk_start_comm(void)
3079 {
3080 	/* Start tracing comms if trace printk is set */
3081 	if (!buffers_allocated)
3082 		return;
3083 	tracing_start_cmdline_record();
3084 }
3085 
trace_printk_start_stop_comm(int enabled)3086 static void trace_printk_start_stop_comm(int enabled)
3087 {
3088 	if (!buffers_allocated)
3089 		return;
3090 
3091 	if (enabled)
3092 		tracing_start_cmdline_record();
3093 	else
3094 		tracing_stop_cmdline_record();
3095 }
3096 
3097 /**
3098  * trace_vbprintk - write binary msg to tracing buffer
3099  * @ip:    The address of the caller
3100  * @fmt:   The string format to write to the buffer
3101  * @args:  Arguments for @fmt
3102  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3103 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3104 {
3105 	struct trace_event_call *call = &event_bprint;
3106 	struct ring_buffer_event *event;
3107 	struct ring_buffer *buffer;
3108 	struct trace_array *tr = &global_trace;
3109 	struct bprint_entry *entry;
3110 	unsigned long flags;
3111 	char *tbuffer;
3112 	int len = 0, size, pc;
3113 
3114 	if (unlikely(tracing_selftest_running || tracing_disabled))
3115 		return 0;
3116 
3117 	/* Don't pollute graph traces with trace_vprintk internals */
3118 	pause_graph_tracing();
3119 
3120 	pc = preempt_count();
3121 	preempt_disable_notrace();
3122 
3123 	tbuffer = get_trace_buf();
3124 	if (!tbuffer) {
3125 		len = 0;
3126 		goto out_nobuffer;
3127 	}
3128 
3129 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3130 
3131 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3132 		goto out;
3133 
3134 	local_save_flags(flags);
3135 	size = sizeof(*entry) + sizeof(u32) * len;
3136 	buffer = tr->trace_buffer.buffer;
3137 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3138 					    flags, pc);
3139 	if (!event)
3140 		goto out;
3141 	entry = ring_buffer_event_data(event);
3142 	entry->ip			= ip;
3143 	entry->fmt			= fmt;
3144 
3145 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3146 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3147 		__buffer_unlock_commit(buffer, event);
3148 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3149 	}
3150 
3151 out:
3152 	put_trace_buf();
3153 
3154 out_nobuffer:
3155 	preempt_enable_notrace();
3156 	unpause_graph_tracing();
3157 
3158 	return len;
3159 }
3160 EXPORT_SYMBOL_GPL(trace_vbprintk);
3161 
3162 __printf(3, 0)
3163 static int
__trace_array_vprintk(struct ring_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3164 __trace_array_vprintk(struct ring_buffer *buffer,
3165 		      unsigned long ip, const char *fmt, va_list args)
3166 {
3167 	struct trace_event_call *call = &event_print;
3168 	struct ring_buffer_event *event;
3169 	int len = 0, size, pc;
3170 	struct print_entry *entry;
3171 	unsigned long flags;
3172 	char *tbuffer;
3173 
3174 	if (tracing_disabled || tracing_selftest_running)
3175 		return 0;
3176 
3177 	/* Don't pollute graph traces with trace_vprintk internals */
3178 	pause_graph_tracing();
3179 
3180 	pc = preempt_count();
3181 	preempt_disable_notrace();
3182 
3183 
3184 	tbuffer = get_trace_buf();
3185 	if (!tbuffer) {
3186 		len = 0;
3187 		goto out_nobuffer;
3188 	}
3189 
3190 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3191 
3192 	local_save_flags(flags);
3193 	size = sizeof(*entry) + len + 1;
3194 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3195 					    flags, pc);
3196 	if (!event)
3197 		goto out;
3198 	entry = ring_buffer_event_data(event);
3199 	entry->ip = ip;
3200 
3201 	memcpy(&entry->buf, tbuffer, len + 1);
3202 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3203 		__buffer_unlock_commit(buffer, event);
3204 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3205 	}
3206 
3207 out:
3208 	put_trace_buf();
3209 
3210 out_nobuffer:
3211 	preempt_enable_notrace();
3212 	unpause_graph_tracing();
3213 
3214 	return len;
3215 }
3216 
3217 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3218 int trace_array_vprintk(struct trace_array *tr,
3219 			unsigned long ip, const char *fmt, va_list args)
3220 {
3221 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3222 }
3223 
3224 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3225 int trace_array_printk(struct trace_array *tr,
3226 		       unsigned long ip, const char *fmt, ...)
3227 {
3228 	int ret;
3229 	va_list ap;
3230 
3231 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3232 		return 0;
3233 
3234 	va_start(ap, fmt);
3235 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3236 	va_end(ap);
3237 	return ret;
3238 }
3239 EXPORT_SYMBOL_GPL(trace_array_printk);
3240 
3241 __printf(3, 4)
trace_array_printk_buf(struct ring_buffer * buffer,unsigned long ip,const char * fmt,...)3242 int trace_array_printk_buf(struct ring_buffer *buffer,
3243 			   unsigned long ip, const char *fmt, ...)
3244 {
3245 	int ret;
3246 	va_list ap;
3247 
3248 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3249 		return 0;
3250 
3251 	va_start(ap, fmt);
3252 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3253 	va_end(ap);
3254 	return ret;
3255 }
3256 
3257 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3258 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3259 {
3260 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3261 }
3262 EXPORT_SYMBOL_GPL(trace_vprintk);
3263 
trace_iterator_increment(struct trace_iterator * iter)3264 static void trace_iterator_increment(struct trace_iterator *iter)
3265 {
3266 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3267 
3268 	iter->idx++;
3269 	if (buf_iter)
3270 		ring_buffer_read(buf_iter, NULL);
3271 }
3272 
3273 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3274 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3275 		unsigned long *lost_events)
3276 {
3277 	struct ring_buffer_event *event;
3278 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3279 
3280 	if (buf_iter)
3281 		event = ring_buffer_iter_peek(buf_iter, ts);
3282 	else
3283 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3284 					 lost_events);
3285 
3286 	if (event) {
3287 		iter->ent_size = ring_buffer_event_length(event);
3288 		return ring_buffer_event_data(event);
3289 	}
3290 	iter->ent_size = 0;
3291 	return NULL;
3292 }
3293 
3294 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3295 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3296 		  unsigned long *missing_events, u64 *ent_ts)
3297 {
3298 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3299 	struct trace_entry *ent, *next = NULL;
3300 	unsigned long lost_events = 0, next_lost = 0;
3301 	int cpu_file = iter->cpu_file;
3302 	u64 next_ts = 0, ts;
3303 	int next_cpu = -1;
3304 	int next_size = 0;
3305 	int cpu;
3306 
3307 	/*
3308 	 * If we are in a per_cpu trace file, don't bother by iterating over
3309 	 * all cpu and peek directly.
3310 	 */
3311 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3312 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3313 			return NULL;
3314 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3315 		if (ent_cpu)
3316 			*ent_cpu = cpu_file;
3317 
3318 		return ent;
3319 	}
3320 
3321 	for_each_tracing_cpu(cpu) {
3322 
3323 		if (ring_buffer_empty_cpu(buffer, cpu))
3324 			continue;
3325 
3326 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3327 
3328 		/*
3329 		 * Pick the entry with the smallest timestamp:
3330 		 */
3331 		if (ent && (!next || ts < next_ts)) {
3332 			next = ent;
3333 			next_cpu = cpu;
3334 			next_ts = ts;
3335 			next_lost = lost_events;
3336 			next_size = iter->ent_size;
3337 		}
3338 	}
3339 
3340 	iter->ent_size = next_size;
3341 
3342 	if (ent_cpu)
3343 		*ent_cpu = next_cpu;
3344 
3345 	if (ent_ts)
3346 		*ent_ts = next_ts;
3347 
3348 	if (missing_events)
3349 		*missing_events = next_lost;
3350 
3351 	return next;
3352 }
3353 
3354 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3355 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3356 					  int *ent_cpu, u64 *ent_ts)
3357 {
3358 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3359 }
3360 
3361 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3362 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3363 {
3364 	iter->ent = __find_next_entry(iter, &iter->cpu,
3365 				      &iter->lost_events, &iter->ts);
3366 
3367 	if (iter->ent)
3368 		trace_iterator_increment(iter);
3369 
3370 	return iter->ent ? iter : NULL;
3371 }
3372 
trace_consume(struct trace_iterator * iter)3373 static void trace_consume(struct trace_iterator *iter)
3374 {
3375 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3376 			    &iter->lost_events);
3377 }
3378 
s_next(struct seq_file * m,void * v,loff_t * pos)3379 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3380 {
3381 	struct trace_iterator *iter = m->private;
3382 	int i = (int)*pos;
3383 	void *ent;
3384 
3385 	WARN_ON_ONCE(iter->leftover);
3386 
3387 	(*pos)++;
3388 
3389 	/* can't go backwards */
3390 	if (iter->idx > i)
3391 		return NULL;
3392 
3393 	if (iter->idx < 0)
3394 		ent = trace_find_next_entry_inc(iter);
3395 	else
3396 		ent = iter;
3397 
3398 	while (ent && iter->idx < i)
3399 		ent = trace_find_next_entry_inc(iter);
3400 
3401 	iter->pos = *pos;
3402 
3403 	return ent;
3404 }
3405 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3406 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3407 {
3408 	struct ring_buffer_event *event;
3409 	struct ring_buffer_iter *buf_iter;
3410 	unsigned long entries = 0;
3411 	u64 ts;
3412 
3413 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3414 
3415 	buf_iter = trace_buffer_iter(iter, cpu);
3416 	if (!buf_iter)
3417 		return;
3418 
3419 	ring_buffer_iter_reset(buf_iter);
3420 
3421 	/*
3422 	 * We could have the case with the max latency tracers
3423 	 * that a reset never took place on a cpu. This is evident
3424 	 * by the timestamp being before the start of the buffer.
3425 	 */
3426 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3427 		if (ts >= iter->trace_buffer->time_start)
3428 			break;
3429 		entries++;
3430 		ring_buffer_read(buf_iter, NULL);
3431 	}
3432 
3433 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3434 }
3435 
3436 /*
3437  * The current tracer is copied to avoid a global locking
3438  * all around.
3439  */
s_start(struct seq_file * m,loff_t * pos)3440 static void *s_start(struct seq_file *m, loff_t *pos)
3441 {
3442 	struct trace_iterator *iter = m->private;
3443 	struct trace_array *tr = iter->tr;
3444 	int cpu_file = iter->cpu_file;
3445 	void *p = NULL;
3446 	loff_t l = 0;
3447 	int cpu;
3448 
3449 	/*
3450 	 * copy the tracer to avoid using a global lock all around.
3451 	 * iter->trace is a copy of current_trace, the pointer to the
3452 	 * name may be used instead of a strcmp(), as iter->trace->name
3453 	 * will point to the same string as current_trace->name.
3454 	 */
3455 	mutex_lock(&trace_types_lock);
3456 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3457 		*iter->trace = *tr->current_trace;
3458 	mutex_unlock(&trace_types_lock);
3459 
3460 #ifdef CONFIG_TRACER_MAX_TRACE
3461 	if (iter->snapshot && iter->trace->use_max_tr)
3462 		return ERR_PTR(-EBUSY);
3463 #endif
3464 
3465 	if (!iter->snapshot)
3466 		atomic_inc(&trace_record_taskinfo_disabled);
3467 
3468 	if (*pos != iter->pos) {
3469 		iter->ent = NULL;
3470 		iter->cpu = 0;
3471 		iter->idx = -1;
3472 
3473 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3474 			for_each_tracing_cpu(cpu)
3475 				tracing_iter_reset(iter, cpu);
3476 		} else
3477 			tracing_iter_reset(iter, cpu_file);
3478 
3479 		iter->leftover = 0;
3480 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3481 			;
3482 
3483 	} else {
3484 		/*
3485 		 * If we overflowed the seq_file before, then we want
3486 		 * to just reuse the trace_seq buffer again.
3487 		 */
3488 		if (iter->leftover)
3489 			p = iter;
3490 		else {
3491 			l = *pos - 1;
3492 			p = s_next(m, p, &l);
3493 		}
3494 	}
3495 
3496 	trace_event_read_lock();
3497 	trace_access_lock(cpu_file);
3498 	return p;
3499 }
3500 
s_stop(struct seq_file * m,void * p)3501 static void s_stop(struct seq_file *m, void *p)
3502 {
3503 	struct trace_iterator *iter = m->private;
3504 
3505 #ifdef CONFIG_TRACER_MAX_TRACE
3506 	if (iter->snapshot && iter->trace->use_max_tr)
3507 		return;
3508 #endif
3509 
3510 	if (!iter->snapshot)
3511 		atomic_dec(&trace_record_taskinfo_disabled);
3512 
3513 	trace_access_unlock(iter->cpu_file);
3514 	trace_event_read_unlock();
3515 }
3516 
3517 static void
get_total_entries_cpu(struct trace_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3518 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3519 		      unsigned long *entries, int cpu)
3520 {
3521 	unsigned long count;
3522 
3523 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3524 	/*
3525 	 * If this buffer has skipped entries, then we hold all
3526 	 * entries for the trace and we need to ignore the
3527 	 * ones before the time stamp.
3528 	 */
3529 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3530 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3531 		/* total is the same as the entries */
3532 		*total = count;
3533 	} else
3534 		*total = count +
3535 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3536 	*entries = count;
3537 }
3538 
3539 static void
get_total_entries(struct trace_buffer * buf,unsigned long * total,unsigned long * entries)3540 get_total_entries(struct trace_buffer *buf,
3541 		  unsigned long *total, unsigned long *entries)
3542 {
3543 	unsigned long t, e;
3544 	int cpu;
3545 
3546 	*total = 0;
3547 	*entries = 0;
3548 
3549 	for_each_tracing_cpu(cpu) {
3550 		get_total_entries_cpu(buf, &t, &e, cpu);
3551 		*total += t;
3552 		*entries += e;
3553 	}
3554 }
3555 
trace_total_entries_cpu(struct trace_array * tr,int cpu)3556 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3557 {
3558 	unsigned long total, entries;
3559 
3560 	if (!tr)
3561 		tr = &global_trace;
3562 
3563 	get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3564 
3565 	return entries;
3566 }
3567 
trace_total_entries(struct trace_array * tr)3568 unsigned long trace_total_entries(struct trace_array *tr)
3569 {
3570 	unsigned long total, entries;
3571 
3572 	if (!tr)
3573 		tr = &global_trace;
3574 
3575 	get_total_entries(&tr->trace_buffer, &total, &entries);
3576 
3577 	return entries;
3578 }
3579 
print_lat_help_header(struct seq_file * m)3580 static void print_lat_help_header(struct seq_file *m)
3581 {
3582 	seq_puts(m, "#                  _------=> CPU#            \n"
3583 		    "#                 / _-----=> irqs-off        \n"
3584 		    "#                | / _----=> need-resched    \n"
3585 		    "#                || / _---=> hardirq/softirq \n"
3586 		    "#                ||| / _--=> preempt-depth   \n"
3587 		    "#                |||| /     delay            \n"
3588 		    "#  cmd     pid   ||||| time  |   caller      \n"
3589 		    "#     \\   /      |||||  \\    |   /         \n");
3590 }
3591 
print_event_info(struct trace_buffer * buf,struct seq_file * m)3592 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3593 {
3594 	unsigned long total;
3595 	unsigned long entries;
3596 
3597 	get_total_entries(buf, &total, &entries);
3598 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3599 		   entries, total, num_online_cpus());
3600 	seq_puts(m, "#\n");
3601 }
3602 
print_func_help_header(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3603 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3604 				   unsigned int flags)
3605 {
3606 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3607 
3608 	print_event_info(buf, m);
3609 
3610 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3611 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3612 }
3613 
print_func_help_header_irq(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3614 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3615 				       unsigned int flags)
3616 {
3617 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3618 	const char *space = "          ";
3619 	int prec = tgid ? 10 : 2;
3620 
3621 	print_event_info(buf, m);
3622 
3623 	seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3624 	seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3625 	seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3626 	seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3627 	seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3628 	seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3629 	seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3630 }
3631 
3632 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3633 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3634 {
3635 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3636 	struct trace_buffer *buf = iter->trace_buffer;
3637 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3638 	struct tracer *type = iter->trace;
3639 	unsigned long entries;
3640 	unsigned long total;
3641 	const char *name = "preemption";
3642 
3643 	name = type->name;
3644 
3645 	get_total_entries(buf, &total, &entries);
3646 
3647 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3648 		   name, UTS_RELEASE);
3649 	seq_puts(m, "# -----------------------------------"
3650 		 "---------------------------------\n");
3651 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3652 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3653 		   nsecs_to_usecs(data->saved_latency),
3654 		   entries,
3655 		   total,
3656 		   buf->cpu,
3657 #if defined(CONFIG_PREEMPT_NONE)
3658 		   "server",
3659 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3660 		   "desktop",
3661 #elif defined(CONFIG_PREEMPT)
3662 		   "preempt",
3663 #else
3664 		   "unknown",
3665 #endif
3666 		   /* These are reserved for later use */
3667 		   0, 0, 0, 0);
3668 #ifdef CONFIG_SMP
3669 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3670 #else
3671 	seq_puts(m, ")\n");
3672 #endif
3673 	seq_puts(m, "#    -----------------\n");
3674 	seq_printf(m, "#    | task: %.16s-%d "
3675 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3676 		   data->comm, data->pid,
3677 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3678 		   data->policy, data->rt_priority);
3679 	seq_puts(m, "#    -----------------\n");
3680 
3681 	if (data->critical_start) {
3682 		seq_puts(m, "#  => started at: ");
3683 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3684 		trace_print_seq(m, &iter->seq);
3685 		seq_puts(m, "\n#  => ended at:   ");
3686 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3687 		trace_print_seq(m, &iter->seq);
3688 		seq_puts(m, "\n#\n");
3689 	}
3690 
3691 	seq_puts(m, "#\n");
3692 }
3693 
test_cpu_buff_start(struct trace_iterator * iter)3694 static void test_cpu_buff_start(struct trace_iterator *iter)
3695 {
3696 	struct trace_seq *s = &iter->seq;
3697 	struct trace_array *tr = iter->tr;
3698 
3699 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3700 		return;
3701 
3702 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3703 		return;
3704 
3705 	if (cpumask_available(iter->started) &&
3706 	    cpumask_test_cpu(iter->cpu, iter->started))
3707 		return;
3708 
3709 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3710 		return;
3711 
3712 	if (cpumask_available(iter->started))
3713 		cpumask_set_cpu(iter->cpu, iter->started);
3714 
3715 	/* Don't print started cpu buffer for the first entry of the trace */
3716 	if (iter->idx > 1)
3717 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3718 				iter->cpu);
3719 }
3720 
print_trace_fmt(struct trace_iterator * iter)3721 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3722 {
3723 	struct trace_array *tr = iter->tr;
3724 	struct trace_seq *s = &iter->seq;
3725 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3726 	struct trace_entry *entry;
3727 	struct trace_event *event;
3728 
3729 	entry = iter->ent;
3730 
3731 	test_cpu_buff_start(iter);
3732 
3733 	event = ftrace_find_event(entry->type);
3734 
3735 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3736 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3737 			trace_print_lat_context(iter);
3738 		else
3739 			trace_print_context(iter);
3740 	}
3741 
3742 	if (trace_seq_has_overflowed(s))
3743 		return TRACE_TYPE_PARTIAL_LINE;
3744 
3745 	if (event)
3746 		return event->funcs->trace(iter, sym_flags, event);
3747 
3748 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3749 
3750 	return trace_handle_return(s);
3751 }
3752 
print_raw_fmt(struct trace_iterator * iter)3753 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3754 {
3755 	struct trace_array *tr = iter->tr;
3756 	struct trace_seq *s = &iter->seq;
3757 	struct trace_entry *entry;
3758 	struct trace_event *event;
3759 
3760 	entry = iter->ent;
3761 
3762 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3763 		trace_seq_printf(s, "%d %d %llu ",
3764 				 entry->pid, iter->cpu, iter->ts);
3765 
3766 	if (trace_seq_has_overflowed(s))
3767 		return TRACE_TYPE_PARTIAL_LINE;
3768 
3769 	event = ftrace_find_event(entry->type);
3770 	if (event)
3771 		return event->funcs->raw(iter, 0, event);
3772 
3773 	trace_seq_printf(s, "%d ?\n", entry->type);
3774 
3775 	return trace_handle_return(s);
3776 }
3777 
print_hex_fmt(struct trace_iterator * iter)3778 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3779 {
3780 	struct trace_array *tr = iter->tr;
3781 	struct trace_seq *s = &iter->seq;
3782 	unsigned char newline = '\n';
3783 	struct trace_entry *entry;
3784 	struct trace_event *event;
3785 
3786 	entry = iter->ent;
3787 
3788 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3789 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3790 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3791 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3792 		if (trace_seq_has_overflowed(s))
3793 			return TRACE_TYPE_PARTIAL_LINE;
3794 	}
3795 
3796 	event = ftrace_find_event(entry->type);
3797 	if (event) {
3798 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3799 		if (ret != TRACE_TYPE_HANDLED)
3800 			return ret;
3801 	}
3802 
3803 	SEQ_PUT_FIELD(s, newline);
3804 
3805 	return trace_handle_return(s);
3806 }
3807 
print_bin_fmt(struct trace_iterator * iter)3808 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3809 {
3810 	struct trace_array *tr = iter->tr;
3811 	struct trace_seq *s = &iter->seq;
3812 	struct trace_entry *entry;
3813 	struct trace_event *event;
3814 
3815 	entry = iter->ent;
3816 
3817 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3818 		SEQ_PUT_FIELD(s, entry->pid);
3819 		SEQ_PUT_FIELD(s, iter->cpu);
3820 		SEQ_PUT_FIELD(s, iter->ts);
3821 		if (trace_seq_has_overflowed(s))
3822 			return TRACE_TYPE_PARTIAL_LINE;
3823 	}
3824 
3825 	event = ftrace_find_event(entry->type);
3826 	return event ? event->funcs->binary(iter, 0, event) :
3827 		TRACE_TYPE_HANDLED;
3828 }
3829 
trace_empty(struct trace_iterator * iter)3830 int trace_empty(struct trace_iterator *iter)
3831 {
3832 	struct ring_buffer_iter *buf_iter;
3833 	int cpu;
3834 
3835 	/* If we are looking at one CPU buffer, only check that one */
3836 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3837 		cpu = iter->cpu_file;
3838 		buf_iter = trace_buffer_iter(iter, cpu);
3839 		if (buf_iter) {
3840 			if (!ring_buffer_iter_empty(buf_iter))
3841 				return 0;
3842 		} else {
3843 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3844 				return 0;
3845 		}
3846 		return 1;
3847 	}
3848 
3849 	for_each_tracing_cpu(cpu) {
3850 		buf_iter = trace_buffer_iter(iter, cpu);
3851 		if (buf_iter) {
3852 			if (!ring_buffer_iter_empty(buf_iter))
3853 				return 0;
3854 		} else {
3855 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3856 				return 0;
3857 		}
3858 	}
3859 
3860 	return 1;
3861 }
3862 
3863 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3864 enum print_line_t print_trace_line(struct trace_iterator *iter)
3865 {
3866 	struct trace_array *tr = iter->tr;
3867 	unsigned long trace_flags = tr->trace_flags;
3868 	enum print_line_t ret;
3869 
3870 	if (iter->lost_events) {
3871 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3872 				 iter->cpu, iter->lost_events);
3873 		if (trace_seq_has_overflowed(&iter->seq))
3874 			return TRACE_TYPE_PARTIAL_LINE;
3875 	}
3876 
3877 	if (iter->trace && iter->trace->print_line) {
3878 		ret = iter->trace->print_line(iter);
3879 		if (ret != TRACE_TYPE_UNHANDLED)
3880 			return ret;
3881 	}
3882 
3883 	if (iter->ent->type == TRACE_BPUTS &&
3884 			trace_flags & TRACE_ITER_PRINTK &&
3885 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3886 		return trace_print_bputs_msg_only(iter);
3887 
3888 	if (iter->ent->type == TRACE_BPRINT &&
3889 			trace_flags & TRACE_ITER_PRINTK &&
3890 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3891 		return trace_print_bprintk_msg_only(iter);
3892 
3893 	if (iter->ent->type == TRACE_PRINT &&
3894 			trace_flags & TRACE_ITER_PRINTK &&
3895 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3896 		return trace_print_printk_msg_only(iter);
3897 
3898 	if (trace_flags & TRACE_ITER_BIN)
3899 		return print_bin_fmt(iter);
3900 
3901 	if (trace_flags & TRACE_ITER_HEX)
3902 		return print_hex_fmt(iter);
3903 
3904 	if (trace_flags & TRACE_ITER_RAW)
3905 		return print_raw_fmt(iter);
3906 
3907 	return print_trace_fmt(iter);
3908 }
3909 
trace_latency_header(struct seq_file * m)3910 void trace_latency_header(struct seq_file *m)
3911 {
3912 	struct trace_iterator *iter = m->private;
3913 	struct trace_array *tr = iter->tr;
3914 
3915 	/* print nothing if the buffers are empty */
3916 	if (trace_empty(iter))
3917 		return;
3918 
3919 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3920 		print_trace_header(m, iter);
3921 
3922 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3923 		print_lat_help_header(m);
3924 }
3925 
trace_default_header(struct seq_file * m)3926 void trace_default_header(struct seq_file *m)
3927 {
3928 	struct trace_iterator *iter = m->private;
3929 	struct trace_array *tr = iter->tr;
3930 	unsigned long trace_flags = tr->trace_flags;
3931 
3932 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3933 		return;
3934 
3935 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3936 		/* print nothing if the buffers are empty */
3937 		if (trace_empty(iter))
3938 			return;
3939 		print_trace_header(m, iter);
3940 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3941 			print_lat_help_header(m);
3942 	} else {
3943 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3944 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3945 				print_func_help_header_irq(iter->trace_buffer,
3946 							   m, trace_flags);
3947 			else
3948 				print_func_help_header(iter->trace_buffer, m,
3949 						       trace_flags);
3950 		}
3951 	}
3952 }
3953 
test_ftrace_alive(struct seq_file * m)3954 static void test_ftrace_alive(struct seq_file *m)
3955 {
3956 	if (!ftrace_is_dead())
3957 		return;
3958 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3959 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3960 }
3961 
3962 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)3963 static void show_snapshot_main_help(struct seq_file *m)
3964 {
3965 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3966 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3967 		    "#                      Takes a snapshot of the main buffer.\n"
3968 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3969 		    "#                      (Doesn't have to be '2' works with any number that\n"
3970 		    "#                       is not a '0' or '1')\n");
3971 }
3972 
show_snapshot_percpu_help(struct seq_file * m)3973 static void show_snapshot_percpu_help(struct seq_file *m)
3974 {
3975 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3976 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3977 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3978 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3979 #else
3980 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3981 		    "#                     Must use main snapshot file to allocate.\n");
3982 #endif
3983 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3984 		    "#                      (Doesn't have to be '2' works with any number that\n"
3985 		    "#                       is not a '0' or '1')\n");
3986 }
3987 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3988 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3989 {
3990 	if (iter->tr->allocated_snapshot)
3991 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3992 	else
3993 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3994 
3995 	seq_puts(m, "# Snapshot commands:\n");
3996 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3997 		show_snapshot_main_help(m);
3998 	else
3999 		show_snapshot_percpu_help(m);
4000 }
4001 #else
4002 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4003 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4004 #endif
4005 
s_show(struct seq_file * m,void * v)4006 static int s_show(struct seq_file *m, void *v)
4007 {
4008 	struct trace_iterator *iter = v;
4009 	int ret;
4010 
4011 	if (iter->ent == NULL) {
4012 		if (iter->tr) {
4013 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4014 			seq_puts(m, "#\n");
4015 			test_ftrace_alive(m);
4016 		}
4017 		if (iter->snapshot && trace_empty(iter))
4018 			print_snapshot_help(m, iter);
4019 		else if (iter->trace && iter->trace->print_header)
4020 			iter->trace->print_header(m);
4021 		else
4022 			trace_default_header(m);
4023 
4024 	} else if (iter->leftover) {
4025 		/*
4026 		 * If we filled the seq_file buffer earlier, we
4027 		 * want to just show it now.
4028 		 */
4029 		ret = trace_print_seq(m, &iter->seq);
4030 
4031 		/* ret should this time be zero, but you never know */
4032 		iter->leftover = ret;
4033 
4034 	} else {
4035 		print_trace_line(iter);
4036 		ret = trace_print_seq(m, &iter->seq);
4037 		/*
4038 		 * If we overflow the seq_file buffer, then it will
4039 		 * ask us for this data again at start up.
4040 		 * Use that instead.
4041 		 *  ret is 0 if seq_file write succeeded.
4042 		 *        -1 otherwise.
4043 		 */
4044 		iter->leftover = ret;
4045 	}
4046 
4047 	return 0;
4048 }
4049 
4050 /*
4051  * Should be used after trace_array_get(), trace_types_lock
4052  * ensures that i_cdev was already initialized.
4053  */
tracing_get_cpu(struct inode * inode)4054 static inline int tracing_get_cpu(struct inode *inode)
4055 {
4056 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4057 		return (long)inode->i_cdev - 1;
4058 	return RING_BUFFER_ALL_CPUS;
4059 }
4060 
4061 static const struct seq_operations tracer_seq_ops = {
4062 	.start		= s_start,
4063 	.next		= s_next,
4064 	.stop		= s_stop,
4065 	.show		= s_show,
4066 };
4067 
4068 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4069 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4070 {
4071 	struct trace_array *tr = inode->i_private;
4072 	struct trace_iterator *iter;
4073 	int cpu;
4074 
4075 	if (tracing_disabled)
4076 		return ERR_PTR(-ENODEV);
4077 
4078 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4079 	if (!iter)
4080 		return ERR_PTR(-ENOMEM);
4081 
4082 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4083 				    GFP_KERNEL);
4084 	if (!iter->buffer_iter)
4085 		goto release;
4086 
4087 	/*
4088 	 * We make a copy of the current tracer to avoid concurrent
4089 	 * changes on it while we are reading.
4090 	 */
4091 	mutex_lock(&trace_types_lock);
4092 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4093 	if (!iter->trace)
4094 		goto fail;
4095 
4096 	*iter->trace = *tr->current_trace;
4097 
4098 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4099 		goto fail;
4100 
4101 	iter->tr = tr;
4102 
4103 #ifdef CONFIG_TRACER_MAX_TRACE
4104 	/* Currently only the top directory has a snapshot */
4105 	if (tr->current_trace->print_max || snapshot)
4106 		iter->trace_buffer = &tr->max_buffer;
4107 	else
4108 #endif
4109 		iter->trace_buffer = &tr->trace_buffer;
4110 	iter->snapshot = snapshot;
4111 	iter->pos = -1;
4112 	iter->cpu_file = tracing_get_cpu(inode);
4113 	mutex_init(&iter->mutex);
4114 
4115 	/* Notify the tracer early; before we stop tracing. */
4116 	if (iter->trace && iter->trace->open)
4117 		iter->trace->open(iter);
4118 
4119 	/* Annotate start of buffers if we had overruns */
4120 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4121 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4122 
4123 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4124 	if (trace_clocks[tr->clock_id].in_ns)
4125 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4126 
4127 	/* stop the trace while dumping if we are not opening "snapshot" */
4128 	if (!iter->snapshot)
4129 		tracing_stop_tr(tr);
4130 
4131 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4132 		for_each_tracing_cpu(cpu) {
4133 			iter->buffer_iter[cpu] =
4134 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
4135 							 cpu, GFP_KERNEL);
4136 		}
4137 		ring_buffer_read_prepare_sync();
4138 		for_each_tracing_cpu(cpu) {
4139 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4140 			tracing_iter_reset(iter, cpu);
4141 		}
4142 	} else {
4143 		cpu = iter->cpu_file;
4144 		iter->buffer_iter[cpu] =
4145 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
4146 						 cpu, GFP_KERNEL);
4147 		ring_buffer_read_prepare_sync();
4148 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4149 		tracing_iter_reset(iter, cpu);
4150 	}
4151 
4152 	mutex_unlock(&trace_types_lock);
4153 
4154 	return iter;
4155 
4156  fail:
4157 	mutex_unlock(&trace_types_lock);
4158 	kfree(iter->trace);
4159 	kfree(iter->buffer_iter);
4160 release:
4161 	seq_release_private(inode, file);
4162 	return ERR_PTR(-ENOMEM);
4163 }
4164 
tracing_open_generic(struct inode * inode,struct file * filp)4165 int tracing_open_generic(struct inode *inode, struct file *filp)
4166 {
4167 	int ret;
4168 
4169 	ret = tracing_check_open_get_tr(NULL);
4170 	if (ret)
4171 		return ret;
4172 
4173 	filp->private_data = inode->i_private;
4174 	return 0;
4175 }
4176 
tracing_is_disabled(void)4177 bool tracing_is_disabled(void)
4178 {
4179 	return (tracing_disabled) ? true: false;
4180 }
4181 
4182 /*
4183  * Open and update trace_array ref count.
4184  * Must have the current trace_array passed to it.
4185  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4186 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4187 {
4188 	struct trace_array *tr = inode->i_private;
4189 	int ret;
4190 
4191 	ret = tracing_check_open_get_tr(tr);
4192 	if (ret)
4193 		return ret;
4194 
4195 	filp->private_data = inode->i_private;
4196 
4197 	return 0;
4198 }
4199 
tracing_release(struct inode * inode,struct file * file)4200 static int tracing_release(struct inode *inode, struct file *file)
4201 {
4202 	struct trace_array *tr = inode->i_private;
4203 	struct seq_file *m = file->private_data;
4204 	struct trace_iterator *iter;
4205 	int cpu;
4206 
4207 	if (!(file->f_mode & FMODE_READ)) {
4208 		trace_array_put(tr);
4209 		return 0;
4210 	}
4211 
4212 	/* Writes do not use seq_file */
4213 	iter = m->private;
4214 	mutex_lock(&trace_types_lock);
4215 
4216 	for_each_tracing_cpu(cpu) {
4217 		if (iter->buffer_iter[cpu])
4218 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4219 	}
4220 
4221 	if (iter->trace && iter->trace->close)
4222 		iter->trace->close(iter);
4223 
4224 	if (!iter->snapshot)
4225 		/* reenable tracing if it was previously enabled */
4226 		tracing_start_tr(tr);
4227 
4228 	__trace_array_put(tr);
4229 
4230 	mutex_unlock(&trace_types_lock);
4231 
4232 	mutex_destroy(&iter->mutex);
4233 	free_cpumask_var(iter->started);
4234 	kfree(iter->trace);
4235 	kfree(iter->buffer_iter);
4236 	seq_release_private(inode, file);
4237 
4238 	return 0;
4239 }
4240 
tracing_release_generic_tr(struct inode * inode,struct file * file)4241 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4242 {
4243 	struct trace_array *tr = inode->i_private;
4244 
4245 	trace_array_put(tr);
4246 	return 0;
4247 }
4248 
tracing_single_release_tr(struct inode * inode,struct file * file)4249 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4250 {
4251 	struct trace_array *tr = inode->i_private;
4252 
4253 	trace_array_put(tr);
4254 
4255 	return single_release(inode, file);
4256 }
4257 
tracing_open(struct inode * inode,struct file * file)4258 static int tracing_open(struct inode *inode, struct file *file)
4259 {
4260 	struct trace_array *tr = inode->i_private;
4261 	struct trace_iterator *iter;
4262 	int ret;
4263 
4264 	ret = tracing_check_open_get_tr(tr);
4265 	if (ret)
4266 		return ret;
4267 
4268 	/* If this file was open for write, then erase contents */
4269 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4270 		int cpu = tracing_get_cpu(inode);
4271 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4272 
4273 #ifdef CONFIG_TRACER_MAX_TRACE
4274 		if (tr->current_trace->print_max)
4275 			trace_buf = &tr->max_buffer;
4276 #endif
4277 
4278 		if (cpu == RING_BUFFER_ALL_CPUS)
4279 			tracing_reset_online_cpus(trace_buf);
4280 		else
4281 			tracing_reset_cpu(trace_buf, cpu);
4282 	}
4283 
4284 	if (file->f_mode & FMODE_READ) {
4285 		iter = __tracing_open(inode, file, false);
4286 		if (IS_ERR(iter))
4287 			ret = PTR_ERR(iter);
4288 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4289 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4290 	}
4291 
4292 	if (ret < 0)
4293 		trace_array_put(tr);
4294 
4295 	return ret;
4296 }
4297 
4298 /*
4299  * Some tracers are not suitable for instance buffers.
4300  * A tracer is always available for the global array (toplevel)
4301  * or if it explicitly states that it is.
4302  */
4303 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4304 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4305 {
4306 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4307 }
4308 
4309 /* Find the next tracer that this trace array may use */
4310 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4311 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4312 {
4313 	while (t && !trace_ok_for_array(t, tr))
4314 		t = t->next;
4315 
4316 	return t;
4317 }
4318 
4319 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4320 t_next(struct seq_file *m, void *v, loff_t *pos)
4321 {
4322 	struct trace_array *tr = m->private;
4323 	struct tracer *t = v;
4324 
4325 	(*pos)++;
4326 
4327 	if (t)
4328 		t = get_tracer_for_array(tr, t->next);
4329 
4330 	return t;
4331 }
4332 
t_start(struct seq_file * m,loff_t * pos)4333 static void *t_start(struct seq_file *m, loff_t *pos)
4334 {
4335 	struct trace_array *tr = m->private;
4336 	struct tracer *t;
4337 	loff_t l = 0;
4338 
4339 	mutex_lock(&trace_types_lock);
4340 
4341 	t = get_tracer_for_array(tr, trace_types);
4342 	for (; t && l < *pos; t = t_next(m, t, &l))
4343 			;
4344 
4345 	return t;
4346 }
4347 
t_stop(struct seq_file * m,void * p)4348 static void t_stop(struct seq_file *m, void *p)
4349 {
4350 	mutex_unlock(&trace_types_lock);
4351 }
4352 
t_show(struct seq_file * m,void * v)4353 static int t_show(struct seq_file *m, void *v)
4354 {
4355 	struct tracer *t = v;
4356 
4357 	if (!t)
4358 		return 0;
4359 
4360 	seq_puts(m, t->name);
4361 	if (t->next)
4362 		seq_putc(m, ' ');
4363 	else
4364 		seq_putc(m, '\n');
4365 
4366 	return 0;
4367 }
4368 
4369 static const struct seq_operations show_traces_seq_ops = {
4370 	.start		= t_start,
4371 	.next		= t_next,
4372 	.stop		= t_stop,
4373 	.show		= t_show,
4374 };
4375 
show_traces_open(struct inode * inode,struct file * file)4376 static int show_traces_open(struct inode *inode, struct file *file)
4377 {
4378 	struct trace_array *tr = inode->i_private;
4379 	struct seq_file *m;
4380 	int ret;
4381 
4382 	ret = tracing_check_open_get_tr(tr);
4383 	if (ret)
4384 		return ret;
4385 
4386 	ret = seq_open(file, &show_traces_seq_ops);
4387 	if (ret) {
4388 		trace_array_put(tr);
4389 		return ret;
4390 	}
4391 
4392 	m = file->private_data;
4393 	m->private = tr;
4394 
4395 	return 0;
4396 }
4397 
show_traces_release(struct inode * inode,struct file * file)4398 static int show_traces_release(struct inode *inode, struct file *file)
4399 {
4400 	struct trace_array *tr = inode->i_private;
4401 
4402 	trace_array_put(tr);
4403 	return seq_release(inode, file);
4404 }
4405 
4406 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4407 tracing_write_stub(struct file *filp, const char __user *ubuf,
4408 		   size_t count, loff_t *ppos)
4409 {
4410 	return count;
4411 }
4412 
tracing_lseek(struct file * file,loff_t offset,int whence)4413 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4414 {
4415 	int ret;
4416 
4417 	if (file->f_mode & FMODE_READ)
4418 		ret = seq_lseek(file, offset, whence);
4419 	else
4420 		file->f_pos = ret = 0;
4421 
4422 	return ret;
4423 }
4424 
4425 static const struct file_operations tracing_fops = {
4426 	.open		= tracing_open,
4427 	.read		= seq_read,
4428 	.write		= tracing_write_stub,
4429 	.llseek		= tracing_lseek,
4430 	.release	= tracing_release,
4431 };
4432 
4433 static const struct file_operations show_traces_fops = {
4434 	.open		= show_traces_open,
4435 	.read		= seq_read,
4436 	.llseek		= seq_lseek,
4437 	.release	= show_traces_release,
4438 };
4439 
4440 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4441 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4442 		     size_t count, loff_t *ppos)
4443 {
4444 	struct trace_array *tr = file_inode(filp)->i_private;
4445 	char *mask_str;
4446 	int len;
4447 
4448 	len = snprintf(NULL, 0, "%*pb\n",
4449 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4450 	mask_str = kmalloc(len, GFP_KERNEL);
4451 	if (!mask_str)
4452 		return -ENOMEM;
4453 
4454 	len = snprintf(mask_str, len, "%*pb\n",
4455 		       cpumask_pr_args(tr->tracing_cpumask));
4456 	if (len >= count) {
4457 		count = -EINVAL;
4458 		goto out_err;
4459 	}
4460 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4461 
4462 out_err:
4463 	kfree(mask_str);
4464 
4465 	return count;
4466 }
4467 
4468 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4469 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4470 		      size_t count, loff_t *ppos)
4471 {
4472 	struct trace_array *tr = file_inode(filp)->i_private;
4473 	cpumask_var_t tracing_cpumask_new;
4474 	int err, cpu;
4475 
4476 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4477 		return -ENOMEM;
4478 
4479 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4480 	if (err)
4481 		goto err_unlock;
4482 
4483 	local_irq_disable();
4484 	arch_spin_lock(&tr->max_lock);
4485 	for_each_tracing_cpu(cpu) {
4486 		/*
4487 		 * Increase/decrease the disabled counter if we are
4488 		 * about to flip a bit in the cpumask:
4489 		 */
4490 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4491 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4492 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4493 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4494 		}
4495 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4496 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4497 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4498 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4499 		}
4500 	}
4501 	arch_spin_unlock(&tr->max_lock);
4502 	local_irq_enable();
4503 
4504 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4505 	free_cpumask_var(tracing_cpumask_new);
4506 
4507 	return count;
4508 
4509 err_unlock:
4510 	free_cpumask_var(tracing_cpumask_new);
4511 
4512 	return err;
4513 }
4514 
4515 static const struct file_operations tracing_cpumask_fops = {
4516 	.open		= tracing_open_generic_tr,
4517 	.read		= tracing_cpumask_read,
4518 	.write		= tracing_cpumask_write,
4519 	.release	= tracing_release_generic_tr,
4520 	.llseek		= generic_file_llseek,
4521 };
4522 
tracing_trace_options_show(struct seq_file * m,void * v)4523 static int tracing_trace_options_show(struct seq_file *m, void *v)
4524 {
4525 	struct tracer_opt *trace_opts;
4526 	struct trace_array *tr = m->private;
4527 	u32 tracer_flags;
4528 	int i;
4529 
4530 	mutex_lock(&trace_types_lock);
4531 	tracer_flags = tr->current_trace->flags->val;
4532 	trace_opts = tr->current_trace->flags->opts;
4533 
4534 	for (i = 0; trace_options[i]; i++) {
4535 		if (tr->trace_flags & (1 << i))
4536 			seq_printf(m, "%s\n", trace_options[i]);
4537 		else
4538 			seq_printf(m, "no%s\n", trace_options[i]);
4539 	}
4540 
4541 	for (i = 0; trace_opts[i].name; i++) {
4542 		if (tracer_flags & trace_opts[i].bit)
4543 			seq_printf(m, "%s\n", trace_opts[i].name);
4544 		else
4545 			seq_printf(m, "no%s\n", trace_opts[i].name);
4546 	}
4547 	mutex_unlock(&trace_types_lock);
4548 
4549 	return 0;
4550 }
4551 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4552 static int __set_tracer_option(struct trace_array *tr,
4553 			       struct tracer_flags *tracer_flags,
4554 			       struct tracer_opt *opts, int neg)
4555 {
4556 	struct tracer *trace = tracer_flags->trace;
4557 	int ret;
4558 
4559 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4560 	if (ret)
4561 		return ret;
4562 
4563 	if (neg)
4564 		tracer_flags->val &= ~opts->bit;
4565 	else
4566 		tracer_flags->val |= opts->bit;
4567 	return 0;
4568 }
4569 
4570 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4571 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4572 {
4573 	struct tracer *trace = tr->current_trace;
4574 	struct tracer_flags *tracer_flags = trace->flags;
4575 	struct tracer_opt *opts = NULL;
4576 	int i;
4577 
4578 	for (i = 0; tracer_flags->opts[i].name; i++) {
4579 		opts = &tracer_flags->opts[i];
4580 
4581 		if (strcmp(cmp, opts->name) == 0)
4582 			return __set_tracer_option(tr, trace->flags, opts, neg);
4583 	}
4584 
4585 	return -EINVAL;
4586 }
4587 
4588 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4589 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4590 {
4591 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4592 		return -1;
4593 
4594 	return 0;
4595 }
4596 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4597 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4598 {
4599 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4600 	    (mask == TRACE_ITER_RECORD_CMD))
4601 		lockdep_assert_held(&event_mutex);
4602 
4603 	/* do nothing if flag is already set */
4604 	if (!!(tr->trace_flags & mask) == !!enabled)
4605 		return 0;
4606 
4607 	/* Give the tracer a chance to approve the change */
4608 	if (tr->current_trace->flag_changed)
4609 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4610 			return -EINVAL;
4611 
4612 	if (enabled)
4613 		tr->trace_flags |= mask;
4614 	else
4615 		tr->trace_flags &= ~mask;
4616 
4617 	if (mask == TRACE_ITER_RECORD_CMD)
4618 		trace_event_enable_cmd_record(enabled);
4619 
4620 	if (mask == TRACE_ITER_RECORD_TGID) {
4621 		if (!tgid_map)
4622 			tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4623 					   sizeof(*tgid_map),
4624 					   GFP_KERNEL);
4625 		if (!tgid_map) {
4626 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4627 			return -ENOMEM;
4628 		}
4629 
4630 		trace_event_enable_tgid_record(enabled);
4631 	}
4632 
4633 	if (mask == TRACE_ITER_EVENT_FORK)
4634 		trace_event_follow_fork(tr, enabled);
4635 
4636 	if (mask == TRACE_ITER_FUNC_FORK)
4637 		ftrace_pid_follow_fork(tr, enabled);
4638 
4639 	if (mask == TRACE_ITER_OVERWRITE) {
4640 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4641 #ifdef CONFIG_TRACER_MAX_TRACE
4642 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4643 #endif
4644 	}
4645 
4646 	if (mask == TRACE_ITER_PRINTK) {
4647 		trace_printk_start_stop_comm(enabled);
4648 		trace_printk_control(enabled);
4649 	}
4650 
4651 	return 0;
4652 }
4653 
trace_set_options(struct trace_array * tr,char * option)4654 static int trace_set_options(struct trace_array *tr, char *option)
4655 {
4656 	char *cmp;
4657 	int neg = 0;
4658 	int ret;
4659 	size_t orig_len = strlen(option);
4660 	int len;
4661 
4662 	cmp = strstrip(option);
4663 
4664 	len = str_has_prefix(cmp, "no");
4665 	if (len)
4666 		neg = 1;
4667 
4668 	cmp += len;
4669 
4670 	mutex_lock(&event_mutex);
4671 	mutex_lock(&trace_types_lock);
4672 
4673 	ret = match_string(trace_options, -1, cmp);
4674 	/* If no option could be set, test the specific tracer options */
4675 	if (ret < 0)
4676 		ret = set_tracer_option(tr, cmp, neg);
4677 	else
4678 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4679 
4680 	mutex_unlock(&trace_types_lock);
4681 	mutex_unlock(&event_mutex);
4682 
4683 	/*
4684 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4685 	 * turn it back into a space.
4686 	 */
4687 	if (orig_len > strlen(option))
4688 		option[strlen(option)] = ' ';
4689 
4690 	return ret;
4691 }
4692 
apply_trace_boot_options(void)4693 static void __init apply_trace_boot_options(void)
4694 {
4695 	char *buf = trace_boot_options_buf;
4696 	char *option;
4697 
4698 	while (true) {
4699 		option = strsep(&buf, ",");
4700 
4701 		if (!option)
4702 			break;
4703 
4704 		if (*option)
4705 			trace_set_options(&global_trace, option);
4706 
4707 		/* Put back the comma to allow this to be called again */
4708 		if (buf)
4709 			*(buf - 1) = ',';
4710 	}
4711 }
4712 
4713 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4714 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4715 			size_t cnt, loff_t *ppos)
4716 {
4717 	struct seq_file *m = filp->private_data;
4718 	struct trace_array *tr = m->private;
4719 	char buf[64];
4720 	int ret;
4721 
4722 	if (cnt >= sizeof(buf))
4723 		return -EINVAL;
4724 
4725 	if (copy_from_user(buf, ubuf, cnt))
4726 		return -EFAULT;
4727 
4728 	buf[cnt] = 0;
4729 
4730 	ret = trace_set_options(tr, buf);
4731 	if (ret < 0)
4732 		return ret;
4733 
4734 	*ppos += cnt;
4735 
4736 	return cnt;
4737 }
4738 
tracing_trace_options_open(struct inode * inode,struct file * file)4739 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4740 {
4741 	struct trace_array *tr = inode->i_private;
4742 	int ret;
4743 
4744 	ret = tracing_check_open_get_tr(tr);
4745 	if (ret)
4746 		return ret;
4747 
4748 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4749 	if (ret < 0)
4750 		trace_array_put(tr);
4751 
4752 	return ret;
4753 }
4754 
4755 static const struct file_operations tracing_iter_fops = {
4756 	.open		= tracing_trace_options_open,
4757 	.read		= seq_read,
4758 	.llseek		= seq_lseek,
4759 	.release	= tracing_single_release_tr,
4760 	.write		= tracing_trace_options_write,
4761 };
4762 
4763 static const char readme_msg[] =
4764 	"tracing mini-HOWTO:\n\n"
4765 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4766 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4767 	" Important files:\n"
4768 	"  trace\t\t\t- The static contents of the buffer\n"
4769 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4770 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4771 	"  current_tracer\t- function and latency tracers\n"
4772 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4773 	"  error_log\t- error log for failed commands (that support it)\n"
4774 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4775 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4776 	"  trace_clock\t\t-change the clock used to order events\n"
4777 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4778 	"      global:   Synced across CPUs but slows tracing down.\n"
4779 	"     counter:   Not a clock, but just an increment\n"
4780 	"      uptime:   Jiffy counter from time of boot\n"
4781 	"        perf:   Same clock that perf events use\n"
4782 #ifdef CONFIG_X86_64
4783 	"     x86-tsc:   TSC cycle counter\n"
4784 #endif
4785 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4786 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4787 	"    absolute:   Absolute (standalone) timestamp\n"
4788 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4789 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4790 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4791 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4792 	"\t\t\t  Remove sub-buffer with rmdir\n"
4793 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4794 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4795 	"\t\t\t  option name\n"
4796 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4797 #ifdef CONFIG_DYNAMIC_FTRACE
4798 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4799 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4800 	"\t\t\t  functions\n"
4801 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4802 	"\t     modules: Can select a group via module\n"
4803 	"\t      Format: :mod:<module-name>\n"
4804 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4805 	"\t    triggers: a command to perform when function is hit\n"
4806 	"\t      Format: <function>:<trigger>[:count]\n"
4807 	"\t     trigger: traceon, traceoff\n"
4808 	"\t\t      enable_event:<system>:<event>\n"
4809 	"\t\t      disable_event:<system>:<event>\n"
4810 #ifdef CONFIG_STACKTRACE
4811 	"\t\t      stacktrace\n"
4812 #endif
4813 #ifdef CONFIG_TRACER_SNAPSHOT
4814 	"\t\t      snapshot\n"
4815 #endif
4816 	"\t\t      dump\n"
4817 	"\t\t      cpudump\n"
4818 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4819 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4820 	"\t     The first one will disable tracing every time do_fault is hit\n"
4821 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4822 	"\t       The first time do trap is hit and it disables tracing, the\n"
4823 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4824 	"\t       the counter will not decrement. It only decrements when the\n"
4825 	"\t       trigger did work\n"
4826 	"\t     To remove trigger without count:\n"
4827 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4828 	"\t     To remove trigger with a count:\n"
4829 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4830 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4831 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4832 	"\t    modules: Can select a group via module command :mod:\n"
4833 	"\t    Does not accept triggers\n"
4834 #endif /* CONFIG_DYNAMIC_FTRACE */
4835 #ifdef CONFIG_FUNCTION_TRACER
4836 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4837 	"\t\t    (function)\n"
4838 #endif
4839 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4840 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4841 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4842 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4843 #endif
4844 #ifdef CONFIG_TRACER_SNAPSHOT
4845 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4846 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4847 	"\t\t\t  information\n"
4848 #endif
4849 #ifdef CONFIG_STACK_TRACER
4850 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4851 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4852 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4853 	"\t\t\t  new trace)\n"
4854 #ifdef CONFIG_DYNAMIC_FTRACE
4855 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4856 	"\t\t\t  traces\n"
4857 #endif
4858 #endif /* CONFIG_STACK_TRACER */
4859 #ifdef CONFIG_DYNAMIC_EVENTS
4860 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4861 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4862 #endif
4863 #ifdef CONFIG_KPROBE_EVENTS
4864 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4865 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4866 #endif
4867 #ifdef CONFIG_UPROBE_EVENTS
4868 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4869 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4870 #endif
4871 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4872 	"\t  accepts: event-definitions (one definition per line)\n"
4873 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4874 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4875 #ifdef CONFIG_HIST_TRIGGERS
4876 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4877 #endif
4878 	"\t           -:[<group>/]<event>\n"
4879 #ifdef CONFIG_KPROBE_EVENTS
4880 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4881   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4882 #endif
4883 #ifdef CONFIG_UPROBE_EVENTS
4884   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4885 #endif
4886 	"\t     args: <name>=fetcharg[:type]\n"
4887 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4888 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4889 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4890 #else
4891 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4892 #endif
4893 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4894 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4895 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4896 	"\t           <type>\\[<array-size>\\]\n"
4897 #ifdef CONFIG_HIST_TRIGGERS
4898 	"\t    field: <stype> <name>;\n"
4899 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4900 	"\t           [unsigned] char/int/long\n"
4901 #endif
4902 #endif
4903 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4904 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4905 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4906 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4907 	"\t\t\t  events\n"
4908 	"      filter\t\t- If set, only events passing filter are traced\n"
4909 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4910 	"\t\t\t  <event>:\n"
4911 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4912 	"      filter\t\t- If set, only events passing filter are traced\n"
4913 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4914 	"\t    Format: <trigger>[:count][if <filter>]\n"
4915 	"\t   trigger: traceon, traceoff\n"
4916 	"\t            enable_event:<system>:<event>\n"
4917 	"\t            disable_event:<system>:<event>\n"
4918 #ifdef CONFIG_HIST_TRIGGERS
4919 	"\t            enable_hist:<system>:<event>\n"
4920 	"\t            disable_hist:<system>:<event>\n"
4921 #endif
4922 #ifdef CONFIG_STACKTRACE
4923 	"\t\t    stacktrace\n"
4924 #endif
4925 #ifdef CONFIG_TRACER_SNAPSHOT
4926 	"\t\t    snapshot\n"
4927 #endif
4928 #ifdef CONFIG_HIST_TRIGGERS
4929 	"\t\t    hist (see below)\n"
4930 #endif
4931 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4932 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4933 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4934 	"\t                  events/block/block_unplug/trigger\n"
4935 	"\t   The first disables tracing every time block_unplug is hit.\n"
4936 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4937 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4938 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4939 	"\t   Like function triggers, the counter is only decremented if it\n"
4940 	"\t    enabled or disabled tracing.\n"
4941 	"\t   To remove a trigger without a count:\n"
4942 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4943 	"\t   To remove a trigger with a count:\n"
4944 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4945 	"\t   Filters can be ignored when removing a trigger.\n"
4946 #ifdef CONFIG_HIST_TRIGGERS
4947 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4948 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4949 	"\t            [:values=<field1[,field2,...]>]\n"
4950 	"\t            [:sort=<field1[,field2,...]>]\n"
4951 	"\t            [:size=#entries]\n"
4952 	"\t            [:pause][:continue][:clear]\n"
4953 	"\t            [:name=histname1]\n"
4954 	"\t            [:<handler>.<action>]\n"
4955 	"\t            [if <filter>]\n\n"
4956 	"\t    When a matching event is hit, an entry is added to a hash\n"
4957 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4958 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4959 	"\t    correspond to fields in the event's format description.  Keys\n"
4960 	"\t    can be any field, or the special string 'stacktrace'.\n"
4961 	"\t    Compound keys consisting of up to two fields can be specified\n"
4962 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4963 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4964 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4965 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4966 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4967 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4968 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4969 	"\t    its histogram data will be shared with other triggers of the\n"
4970 	"\t    same name, and trigger hits will update this common data.\n\n"
4971 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4972 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4973 	"\t    triggers attached to an event, there will be a table for each\n"
4974 	"\t    trigger in the output.  The table displayed for a named\n"
4975 	"\t    trigger will be the same as any other instance having the\n"
4976 	"\t    same name.  The default format used to display a given field\n"
4977 	"\t    can be modified by appending any of the following modifiers\n"
4978 	"\t    to the field name, as applicable:\n\n"
4979 	"\t            .hex        display a number as a hex value\n"
4980 	"\t            .sym        display an address as a symbol\n"
4981 	"\t            .sym-offset display an address as a symbol and offset\n"
4982 	"\t            .execname   display a common_pid as a program name\n"
4983 	"\t            .syscall    display a syscall id as a syscall name\n"
4984 	"\t            .log2       display log2 value rather than raw number\n"
4985 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4986 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4987 	"\t    trigger or to start a hist trigger but not log any events\n"
4988 	"\t    until told to do so.  'continue' can be used to start or\n"
4989 	"\t    restart a paused hist trigger.\n\n"
4990 	"\t    The 'clear' parameter will clear the contents of a running\n"
4991 	"\t    hist trigger and leave its current paused/active state\n"
4992 	"\t    unchanged.\n\n"
4993 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4994 	"\t    have one event conditionally start and stop another event's\n"
4995 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4996 	"\t    the enable_event and disable_event triggers.\n\n"
4997 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4998 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4999 	"\t        <handler>.<action>\n\n"
5000 	"\t    The available handlers are:\n\n"
5001 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5002 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5003 	"\t        onchange(var)            - invoke action if var changes\n\n"
5004 	"\t    The available actions are:\n\n"
5005 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5006 	"\t        save(field,...)                      - save current event fields\n"
5007 #ifdef CONFIG_TRACER_SNAPSHOT
5008 	"\t        snapshot()                           - snapshot the trace buffer\n"
5009 #endif
5010 #endif
5011 ;
5012 
5013 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5014 tracing_readme_read(struct file *filp, char __user *ubuf,
5015 		       size_t cnt, loff_t *ppos)
5016 {
5017 	return simple_read_from_buffer(ubuf, cnt, ppos,
5018 					readme_msg, strlen(readme_msg));
5019 }
5020 
5021 static const struct file_operations tracing_readme_fops = {
5022 	.open		= tracing_open_generic,
5023 	.read		= tracing_readme_read,
5024 	.llseek		= generic_file_llseek,
5025 };
5026 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5027 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5028 {
5029 	int *ptr = v;
5030 
5031 	if (*pos || m->count)
5032 		ptr++;
5033 
5034 	(*pos)++;
5035 
5036 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5037 		if (trace_find_tgid(*ptr))
5038 			return ptr;
5039 	}
5040 
5041 	return NULL;
5042 }
5043 
saved_tgids_start(struct seq_file * m,loff_t * pos)5044 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5045 {
5046 	void *v;
5047 	loff_t l = 0;
5048 
5049 	if (!tgid_map)
5050 		return NULL;
5051 
5052 	v = &tgid_map[0];
5053 	while (l <= *pos) {
5054 		v = saved_tgids_next(m, v, &l);
5055 		if (!v)
5056 			return NULL;
5057 	}
5058 
5059 	return v;
5060 }
5061 
saved_tgids_stop(struct seq_file * m,void * v)5062 static void saved_tgids_stop(struct seq_file *m, void *v)
5063 {
5064 }
5065 
saved_tgids_show(struct seq_file * m,void * v)5066 static int saved_tgids_show(struct seq_file *m, void *v)
5067 {
5068 	int pid = (int *)v - tgid_map;
5069 
5070 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5071 	return 0;
5072 }
5073 
5074 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5075 	.start		= saved_tgids_start,
5076 	.stop		= saved_tgids_stop,
5077 	.next		= saved_tgids_next,
5078 	.show		= saved_tgids_show,
5079 };
5080 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5081 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5082 {
5083 	int ret;
5084 
5085 	ret = tracing_check_open_get_tr(NULL);
5086 	if (ret)
5087 		return ret;
5088 
5089 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5090 }
5091 
5092 
5093 static const struct file_operations tracing_saved_tgids_fops = {
5094 	.open		= tracing_saved_tgids_open,
5095 	.read		= seq_read,
5096 	.llseek		= seq_lseek,
5097 	.release	= seq_release,
5098 };
5099 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5100 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5101 {
5102 	unsigned int *ptr = v;
5103 
5104 	if (*pos || m->count)
5105 		ptr++;
5106 
5107 	(*pos)++;
5108 
5109 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5110 	     ptr++) {
5111 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5112 			continue;
5113 
5114 		return ptr;
5115 	}
5116 
5117 	return NULL;
5118 }
5119 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5120 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5121 {
5122 	void *v;
5123 	loff_t l = 0;
5124 
5125 	preempt_disable();
5126 	arch_spin_lock(&trace_cmdline_lock);
5127 
5128 	v = &savedcmd->map_cmdline_to_pid[0];
5129 	while (l <= *pos) {
5130 		v = saved_cmdlines_next(m, v, &l);
5131 		if (!v)
5132 			return NULL;
5133 	}
5134 
5135 	return v;
5136 }
5137 
saved_cmdlines_stop(struct seq_file * m,void * v)5138 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5139 {
5140 	arch_spin_unlock(&trace_cmdline_lock);
5141 	preempt_enable();
5142 }
5143 
saved_cmdlines_show(struct seq_file * m,void * v)5144 static int saved_cmdlines_show(struct seq_file *m, void *v)
5145 {
5146 	char buf[TASK_COMM_LEN];
5147 	unsigned int *pid = v;
5148 
5149 	__trace_find_cmdline(*pid, buf);
5150 	seq_printf(m, "%d %s\n", *pid, buf);
5151 	return 0;
5152 }
5153 
5154 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5155 	.start		= saved_cmdlines_start,
5156 	.next		= saved_cmdlines_next,
5157 	.stop		= saved_cmdlines_stop,
5158 	.show		= saved_cmdlines_show,
5159 };
5160 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5161 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5162 {
5163 	int ret;
5164 
5165 	ret = tracing_check_open_get_tr(NULL);
5166 	if (ret)
5167 		return ret;
5168 
5169 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5170 }
5171 
5172 static const struct file_operations tracing_saved_cmdlines_fops = {
5173 	.open		= tracing_saved_cmdlines_open,
5174 	.read		= seq_read,
5175 	.llseek		= seq_lseek,
5176 	.release	= seq_release,
5177 };
5178 
5179 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5180 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5181 				 size_t cnt, loff_t *ppos)
5182 {
5183 	char buf[64];
5184 	int r;
5185 
5186 	arch_spin_lock(&trace_cmdline_lock);
5187 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5188 	arch_spin_unlock(&trace_cmdline_lock);
5189 
5190 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5191 }
5192 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)5193 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5194 {
5195 	kfree(s->saved_cmdlines);
5196 	kfree(s->map_cmdline_to_pid);
5197 	kfree(s);
5198 }
5199 
tracing_resize_saved_cmdlines(unsigned int val)5200 static int tracing_resize_saved_cmdlines(unsigned int val)
5201 {
5202 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5203 
5204 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5205 	if (!s)
5206 		return -ENOMEM;
5207 
5208 	if (allocate_cmdlines_buffer(val, s) < 0) {
5209 		kfree(s);
5210 		return -ENOMEM;
5211 	}
5212 
5213 	arch_spin_lock(&trace_cmdline_lock);
5214 	savedcmd_temp = savedcmd;
5215 	savedcmd = s;
5216 	arch_spin_unlock(&trace_cmdline_lock);
5217 	free_saved_cmdlines_buffer(savedcmd_temp);
5218 
5219 	return 0;
5220 }
5221 
5222 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5223 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5224 				  size_t cnt, loff_t *ppos)
5225 {
5226 	unsigned long val;
5227 	int ret;
5228 
5229 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5230 	if (ret)
5231 		return ret;
5232 
5233 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5234 	if (!val || val > PID_MAX_DEFAULT)
5235 		return -EINVAL;
5236 
5237 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5238 	if (ret < 0)
5239 		return ret;
5240 
5241 	*ppos += cnt;
5242 
5243 	return cnt;
5244 }
5245 
5246 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5247 	.open		= tracing_open_generic,
5248 	.read		= tracing_saved_cmdlines_size_read,
5249 	.write		= tracing_saved_cmdlines_size_write,
5250 };
5251 
5252 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5253 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5254 update_eval_map(union trace_eval_map_item *ptr)
5255 {
5256 	if (!ptr->map.eval_string) {
5257 		if (ptr->tail.next) {
5258 			ptr = ptr->tail.next;
5259 			/* Set ptr to the next real item (skip head) */
5260 			ptr++;
5261 		} else
5262 			return NULL;
5263 	}
5264 	return ptr;
5265 }
5266 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5267 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5268 {
5269 	union trace_eval_map_item *ptr = v;
5270 
5271 	/*
5272 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5273 	 * This really should never happen.
5274 	 */
5275 	ptr = update_eval_map(ptr);
5276 	if (WARN_ON_ONCE(!ptr))
5277 		return NULL;
5278 
5279 	ptr++;
5280 
5281 	(*pos)++;
5282 
5283 	ptr = update_eval_map(ptr);
5284 
5285 	return ptr;
5286 }
5287 
eval_map_start(struct seq_file * m,loff_t * pos)5288 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5289 {
5290 	union trace_eval_map_item *v;
5291 	loff_t l = 0;
5292 
5293 	mutex_lock(&trace_eval_mutex);
5294 
5295 	v = trace_eval_maps;
5296 	if (v)
5297 		v++;
5298 
5299 	while (v && l < *pos) {
5300 		v = eval_map_next(m, v, &l);
5301 	}
5302 
5303 	return v;
5304 }
5305 
eval_map_stop(struct seq_file * m,void * v)5306 static void eval_map_stop(struct seq_file *m, void *v)
5307 {
5308 	mutex_unlock(&trace_eval_mutex);
5309 }
5310 
eval_map_show(struct seq_file * m,void * v)5311 static int eval_map_show(struct seq_file *m, void *v)
5312 {
5313 	union trace_eval_map_item *ptr = v;
5314 
5315 	seq_printf(m, "%s %ld (%s)\n",
5316 		   ptr->map.eval_string, ptr->map.eval_value,
5317 		   ptr->map.system);
5318 
5319 	return 0;
5320 }
5321 
5322 static const struct seq_operations tracing_eval_map_seq_ops = {
5323 	.start		= eval_map_start,
5324 	.next		= eval_map_next,
5325 	.stop		= eval_map_stop,
5326 	.show		= eval_map_show,
5327 };
5328 
tracing_eval_map_open(struct inode * inode,struct file * filp)5329 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5330 {
5331 	int ret;
5332 
5333 	ret = tracing_check_open_get_tr(NULL);
5334 	if (ret)
5335 		return ret;
5336 
5337 	return seq_open(filp, &tracing_eval_map_seq_ops);
5338 }
5339 
5340 static const struct file_operations tracing_eval_map_fops = {
5341 	.open		= tracing_eval_map_open,
5342 	.read		= seq_read,
5343 	.llseek		= seq_lseek,
5344 	.release	= seq_release,
5345 };
5346 
5347 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5348 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5349 {
5350 	/* Return tail of array given the head */
5351 	return ptr + ptr->head.length + 1;
5352 }
5353 
5354 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5355 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5356 			   int len)
5357 {
5358 	struct trace_eval_map **stop;
5359 	struct trace_eval_map **map;
5360 	union trace_eval_map_item *map_array;
5361 	union trace_eval_map_item *ptr;
5362 
5363 	stop = start + len;
5364 
5365 	/*
5366 	 * The trace_eval_maps contains the map plus a head and tail item,
5367 	 * where the head holds the module and length of array, and the
5368 	 * tail holds a pointer to the next list.
5369 	 */
5370 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5371 	if (!map_array) {
5372 		pr_warn("Unable to allocate trace eval mapping\n");
5373 		return;
5374 	}
5375 
5376 	mutex_lock(&trace_eval_mutex);
5377 
5378 	if (!trace_eval_maps)
5379 		trace_eval_maps = map_array;
5380 	else {
5381 		ptr = trace_eval_maps;
5382 		for (;;) {
5383 			ptr = trace_eval_jmp_to_tail(ptr);
5384 			if (!ptr->tail.next)
5385 				break;
5386 			ptr = ptr->tail.next;
5387 
5388 		}
5389 		ptr->tail.next = map_array;
5390 	}
5391 	map_array->head.mod = mod;
5392 	map_array->head.length = len;
5393 	map_array++;
5394 
5395 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5396 		map_array->map = **map;
5397 		map_array++;
5398 	}
5399 	memset(map_array, 0, sizeof(*map_array));
5400 
5401 	mutex_unlock(&trace_eval_mutex);
5402 }
5403 
trace_create_eval_file(struct dentry * d_tracer)5404 static void trace_create_eval_file(struct dentry *d_tracer)
5405 {
5406 	trace_create_file("eval_map", 0444, d_tracer,
5407 			  NULL, &tracing_eval_map_fops);
5408 }
5409 
5410 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5411 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5412 static inline void trace_insert_eval_map_file(struct module *mod,
5413 			      struct trace_eval_map **start, int len) { }
5414 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5415 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5416 static void trace_insert_eval_map(struct module *mod,
5417 				  struct trace_eval_map **start, int len)
5418 {
5419 	struct trace_eval_map **map;
5420 
5421 	if (len <= 0)
5422 		return;
5423 
5424 	map = start;
5425 
5426 	trace_event_eval_update(map, len);
5427 
5428 	trace_insert_eval_map_file(mod, start, len);
5429 }
5430 
5431 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5432 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5433 		       size_t cnt, loff_t *ppos)
5434 {
5435 	struct trace_array *tr = filp->private_data;
5436 	char buf[MAX_TRACER_SIZE+2];
5437 	int r;
5438 
5439 	mutex_lock(&trace_types_lock);
5440 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5441 	mutex_unlock(&trace_types_lock);
5442 
5443 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5444 }
5445 
tracer_init(struct tracer * t,struct trace_array * tr)5446 int tracer_init(struct tracer *t, struct trace_array *tr)
5447 {
5448 	tracing_reset_online_cpus(&tr->trace_buffer);
5449 	return t->init(tr);
5450 }
5451 
set_buffer_entries(struct trace_buffer * buf,unsigned long val)5452 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5453 {
5454 	int cpu;
5455 
5456 	for_each_tracing_cpu(cpu)
5457 		per_cpu_ptr(buf->data, cpu)->entries = val;
5458 }
5459 
5460 #ifdef CONFIG_TRACER_MAX_TRACE
5461 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct trace_buffer * trace_buf,struct trace_buffer * size_buf,int cpu_id)5462 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5463 					struct trace_buffer *size_buf, int cpu_id)
5464 {
5465 	int cpu, ret = 0;
5466 
5467 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5468 		for_each_tracing_cpu(cpu) {
5469 			ret = ring_buffer_resize(trace_buf->buffer,
5470 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5471 			if (ret < 0)
5472 				break;
5473 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5474 				per_cpu_ptr(size_buf->data, cpu)->entries;
5475 		}
5476 	} else {
5477 		ret = ring_buffer_resize(trace_buf->buffer,
5478 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5479 		if (ret == 0)
5480 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5481 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5482 	}
5483 
5484 	return ret;
5485 }
5486 #endif /* CONFIG_TRACER_MAX_TRACE */
5487 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5488 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5489 					unsigned long size, int cpu)
5490 {
5491 	int ret;
5492 
5493 	/*
5494 	 * If kernel or user changes the size of the ring buffer
5495 	 * we use the size that was given, and we can forget about
5496 	 * expanding it later.
5497 	 */
5498 	ring_buffer_expanded = true;
5499 
5500 	/* May be called before buffers are initialized */
5501 	if (!tr->trace_buffer.buffer)
5502 		return 0;
5503 
5504 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5505 	if (ret < 0)
5506 		return ret;
5507 
5508 #ifdef CONFIG_TRACER_MAX_TRACE
5509 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5510 	    !tr->current_trace->use_max_tr)
5511 		goto out;
5512 
5513 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5514 	if (ret < 0) {
5515 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5516 						     &tr->trace_buffer, cpu);
5517 		if (r < 0) {
5518 			/*
5519 			 * AARGH! We are left with different
5520 			 * size max buffer!!!!
5521 			 * The max buffer is our "snapshot" buffer.
5522 			 * When a tracer needs a snapshot (one of the
5523 			 * latency tracers), it swaps the max buffer
5524 			 * with the saved snap shot. We succeeded to
5525 			 * update the size of the main buffer, but failed to
5526 			 * update the size of the max buffer. But when we tried
5527 			 * to reset the main buffer to the original size, we
5528 			 * failed there too. This is very unlikely to
5529 			 * happen, but if it does, warn and kill all
5530 			 * tracing.
5531 			 */
5532 			WARN_ON(1);
5533 			tracing_disabled = 1;
5534 		}
5535 		return ret;
5536 	}
5537 
5538 	if (cpu == RING_BUFFER_ALL_CPUS)
5539 		set_buffer_entries(&tr->max_buffer, size);
5540 	else
5541 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5542 
5543  out:
5544 #endif /* CONFIG_TRACER_MAX_TRACE */
5545 
5546 	if (cpu == RING_BUFFER_ALL_CPUS)
5547 		set_buffer_entries(&tr->trace_buffer, size);
5548 	else
5549 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5550 
5551 	return ret;
5552 }
5553 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5554 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5555 					  unsigned long size, int cpu_id)
5556 {
5557 	int ret = size;
5558 
5559 	mutex_lock(&trace_types_lock);
5560 
5561 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5562 		/* make sure, this cpu is enabled in the mask */
5563 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5564 			ret = -EINVAL;
5565 			goto out;
5566 		}
5567 	}
5568 
5569 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5570 	if (ret < 0)
5571 		ret = -ENOMEM;
5572 
5573 out:
5574 	mutex_unlock(&trace_types_lock);
5575 
5576 	return ret;
5577 }
5578 
5579 
5580 /**
5581  * tracing_update_buffers - used by tracing facility to expand ring buffers
5582  *
5583  * To save on memory when the tracing is never used on a system with it
5584  * configured in. The ring buffers are set to a minimum size. But once
5585  * a user starts to use the tracing facility, then they need to grow
5586  * to their default size.
5587  *
5588  * This function is to be called when a tracer is about to be used.
5589  */
tracing_update_buffers(void)5590 int tracing_update_buffers(void)
5591 {
5592 	int ret = 0;
5593 
5594 	mutex_lock(&trace_types_lock);
5595 	if (!ring_buffer_expanded)
5596 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5597 						RING_BUFFER_ALL_CPUS);
5598 	mutex_unlock(&trace_types_lock);
5599 
5600 	return ret;
5601 }
5602 
5603 struct trace_option_dentry;
5604 
5605 static void
5606 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5607 
5608 /*
5609  * Used to clear out the tracer before deletion of an instance.
5610  * Must have trace_types_lock held.
5611  */
tracing_set_nop(struct trace_array * tr)5612 static void tracing_set_nop(struct trace_array *tr)
5613 {
5614 	if (tr->current_trace == &nop_trace)
5615 		return;
5616 
5617 	tr->current_trace->enabled--;
5618 
5619 	if (tr->current_trace->reset)
5620 		tr->current_trace->reset(tr);
5621 
5622 	tr->current_trace = &nop_trace;
5623 }
5624 
add_tracer_options(struct trace_array * tr,struct tracer * t)5625 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5626 {
5627 	/* Only enable if the directory has been created already. */
5628 	if (!tr->dir)
5629 		return;
5630 
5631 	create_trace_option_files(tr, t);
5632 }
5633 
tracing_set_tracer(struct trace_array * tr,const char * buf)5634 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5635 {
5636 	struct tracer *t;
5637 #ifdef CONFIG_TRACER_MAX_TRACE
5638 	bool had_max_tr;
5639 #endif
5640 	int ret = 0;
5641 
5642 	mutex_lock(&trace_types_lock);
5643 
5644 	if (!ring_buffer_expanded) {
5645 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5646 						RING_BUFFER_ALL_CPUS);
5647 		if (ret < 0)
5648 			goto out;
5649 		ret = 0;
5650 	}
5651 
5652 	for (t = trace_types; t; t = t->next) {
5653 		if (strcmp(t->name, buf) == 0)
5654 			break;
5655 	}
5656 	if (!t) {
5657 		ret = -EINVAL;
5658 		goto out;
5659 	}
5660 	if (t == tr->current_trace)
5661 		goto out;
5662 
5663 #ifdef CONFIG_TRACER_SNAPSHOT
5664 	if (t->use_max_tr) {
5665 		arch_spin_lock(&tr->max_lock);
5666 		if (tr->cond_snapshot)
5667 			ret = -EBUSY;
5668 		arch_spin_unlock(&tr->max_lock);
5669 		if (ret)
5670 			goto out;
5671 	}
5672 #endif
5673 	/* Some tracers won't work on kernel command line */
5674 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5675 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5676 			t->name);
5677 		goto out;
5678 	}
5679 
5680 	/* Some tracers are only allowed for the top level buffer */
5681 	if (!trace_ok_for_array(t, tr)) {
5682 		ret = -EINVAL;
5683 		goto out;
5684 	}
5685 
5686 	/* If trace pipe files are being read, we can't change the tracer */
5687 	if (tr->current_trace->ref) {
5688 		ret = -EBUSY;
5689 		goto out;
5690 	}
5691 
5692 	trace_branch_disable();
5693 
5694 	tr->current_trace->enabled--;
5695 
5696 	if (tr->current_trace->reset)
5697 		tr->current_trace->reset(tr);
5698 
5699 	/* Current trace needs to be nop_trace before synchronize_rcu */
5700 	tr->current_trace = &nop_trace;
5701 
5702 #ifdef CONFIG_TRACER_MAX_TRACE
5703 	had_max_tr = tr->allocated_snapshot;
5704 
5705 	if (had_max_tr && !t->use_max_tr) {
5706 		/*
5707 		 * We need to make sure that the update_max_tr sees that
5708 		 * current_trace changed to nop_trace to keep it from
5709 		 * swapping the buffers after we resize it.
5710 		 * The update_max_tr is called from interrupts disabled
5711 		 * so a synchronized_sched() is sufficient.
5712 		 */
5713 		synchronize_rcu();
5714 		free_snapshot(tr);
5715 	}
5716 #endif
5717 
5718 #ifdef CONFIG_TRACER_MAX_TRACE
5719 	if (t->use_max_tr && !had_max_tr) {
5720 		ret = tracing_alloc_snapshot_instance(tr);
5721 		if (ret < 0)
5722 			goto out;
5723 	}
5724 #endif
5725 
5726 	if (t->init) {
5727 		ret = tracer_init(t, tr);
5728 		if (ret)
5729 			goto out;
5730 	}
5731 
5732 	tr->current_trace = t;
5733 	tr->current_trace->enabled++;
5734 	trace_branch_enable(tr);
5735  out:
5736 	mutex_unlock(&trace_types_lock);
5737 
5738 	return ret;
5739 }
5740 
5741 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5742 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5743 			size_t cnt, loff_t *ppos)
5744 {
5745 	struct trace_array *tr = filp->private_data;
5746 	char buf[MAX_TRACER_SIZE+1];
5747 	int i;
5748 	size_t ret;
5749 	int err;
5750 
5751 	ret = cnt;
5752 
5753 	if (cnt > MAX_TRACER_SIZE)
5754 		cnt = MAX_TRACER_SIZE;
5755 
5756 	if (copy_from_user(buf, ubuf, cnt))
5757 		return -EFAULT;
5758 
5759 	buf[cnt] = 0;
5760 
5761 	/* strip ending whitespace. */
5762 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5763 		buf[i] = 0;
5764 
5765 	err = tracing_set_tracer(tr, buf);
5766 	if (err)
5767 		return err;
5768 
5769 	*ppos += ret;
5770 
5771 	return ret;
5772 }
5773 
5774 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5775 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5776 		   size_t cnt, loff_t *ppos)
5777 {
5778 	char buf[64];
5779 	int r;
5780 
5781 	r = snprintf(buf, sizeof(buf), "%ld\n",
5782 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5783 	if (r > sizeof(buf))
5784 		r = sizeof(buf);
5785 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5786 }
5787 
5788 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5789 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5790 		    size_t cnt, loff_t *ppos)
5791 {
5792 	unsigned long val;
5793 	int ret;
5794 
5795 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5796 	if (ret)
5797 		return ret;
5798 
5799 	*ptr = val * 1000;
5800 
5801 	return cnt;
5802 }
5803 
5804 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5805 tracing_thresh_read(struct file *filp, char __user *ubuf,
5806 		    size_t cnt, loff_t *ppos)
5807 {
5808 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5809 }
5810 
5811 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5812 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5813 		     size_t cnt, loff_t *ppos)
5814 {
5815 	struct trace_array *tr = filp->private_data;
5816 	int ret;
5817 
5818 	mutex_lock(&trace_types_lock);
5819 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5820 	if (ret < 0)
5821 		goto out;
5822 
5823 	if (tr->current_trace->update_thresh) {
5824 		ret = tr->current_trace->update_thresh(tr);
5825 		if (ret < 0)
5826 			goto out;
5827 	}
5828 
5829 	ret = cnt;
5830 out:
5831 	mutex_unlock(&trace_types_lock);
5832 
5833 	return ret;
5834 }
5835 
5836 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5837 
5838 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5839 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5840 		     size_t cnt, loff_t *ppos)
5841 {
5842 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5843 }
5844 
5845 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5846 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5847 		      size_t cnt, loff_t *ppos)
5848 {
5849 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5850 }
5851 
5852 #endif
5853 
tracing_open_pipe(struct inode * inode,struct file * filp)5854 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5855 {
5856 	struct trace_array *tr = inode->i_private;
5857 	struct trace_iterator *iter;
5858 	int ret;
5859 
5860 	ret = tracing_check_open_get_tr(tr);
5861 	if (ret)
5862 		return ret;
5863 
5864 	mutex_lock(&trace_types_lock);
5865 
5866 	/* create a buffer to store the information to pass to userspace */
5867 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5868 	if (!iter) {
5869 		ret = -ENOMEM;
5870 		__trace_array_put(tr);
5871 		goto out;
5872 	}
5873 
5874 	trace_seq_init(&iter->seq);
5875 	iter->trace = tr->current_trace;
5876 
5877 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5878 		ret = -ENOMEM;
5879 		goto fail;
5880 	}
5881 
5882 	/* trace pipe does not show start of buffer */
5883 	cpumask_setall(iter->started);
5884 
5885 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5886 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5887 
5888 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5889 	if (trace_clocks[tr->clock_id].in_ns)
5890 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5891 
5892 	iter->tr = tr;
5893 	iter->trace_buffer = &tr->trace_buffer;
5894 	iter->cpu_file = tracing_get_cpu(inode);
5895 	mutex_init(&iter->mutex);
5896 	filp->private_data = iter;
5897 
5898 	if (iter->trace->pipe_open)
5899 		iter->trace->pipe_open(iter);
5900 
5901 	nonseekable_open(inode, filp);
5902 
5903 	tr->current_trace->ref++;
5904 out:
5905 	mutex_unlock(&trace_types_lock);
5906 	return ret;
5907 
5908 fail:
5909 	kfree(iter);
5910 	__trace_array_put(tr);
5911 	mutex_unlock(&trace_types_lock);
5912 	return ret;
5913 }
5914 
tracing_release_pipe(struct inode * inode,struct file * file)5915 static int tracing_release_pipe(struct inode *inode, struct file *file)
5916 {
5917 	struct trace_iterator *iter = file->private_data;
5918 	struct trace_array *tr = inode->i_private;
5919 
5920 	mutex_lock(&trace_types_lock);
5921 
5922 	tr->current_trace->ref--;
5923 
5924 	if (iter->trace->pipe_close)
5925 		iter->trace->pipe_close(iter);
5926 
5927 	mutex_unlock(&trace_types_lock);
5928 
5929 	free_cpumask_var(iter->started);
5930 	mutex_destroy(&iter->mutex);
5931 	kfree(iter);
5932 
5933 	trace_array_put(tr);
5934 
5935 	return 0;
5936 }
5937 
5938 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5939 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5940 {
5941 	struct trace_array *tr = iter->tr;
5942 
5943 	/* Iterators are static, they should be filled or empty */
5944 	if (trace_buffer_iter(iter, iter->cpu_file))
5945 		return EPOLLIN | EPOLLRDNORM;
5946 
5947 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5948 		/*
5949 		 * Always select as readable when in blocking mode
5950 		 */
5951 		return EPOLLIN | EPOLLRDNORM;
5952 	else
5953 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5954 					     filp, poll_table);
5955 }
5956 
5957 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5958 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5959 {
5960 	struct trace_iterator *iter = filp->private_data;
5961 
5962 	return trace_poll(iter, filp, poll_table);
5963 }
5964 
5965 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5966 static int tracing_wait_pipe(struct file *filp)
5967 {
5968 	struct trace_iterator *iter = filp->private_data;
5969 	int ret;
5970 
5971 	while (trace_empty(iter)) {
5972 
5973 		if ((filp->f_flags & O_NONBLOCK)) {
5974 			return -EAGAIN;
5975 		}
5976 
5977 		/*
5978 		 * We block until we read something and tracing is disabled.
5979 		 * We still block if tracing is disabled, but we have never
5980 		 * read anything. This allows a user to cat this file, and
5981 		 * then enable tracing. But after we have read something,
5982 		 * we give an EOF when tracing is again disabled.
5983 		 *
5984 		 * iter->pos will be 0 if we haven't read anything.
5985 		 */
5986 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5987 			break;
5988 
5989 		mutex_unlock(&iter->mutex);
5990 
5991 		ret = wait_on_pipe(iter, 0);
5992 
5993 		mutex_lock(&iter->mutex);
5994 
5995 		if (ret)
5996 			return ret;
5997 	}
5998 
5999 	return 1;
6000 }
6001 
6002 /*
6003  * Consumer reader.
6004  */
6005 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6006 tracing_read_pipe(struct file *filp, char __user *ubuf,
6007 		  size_t cnt, loff_t *ppos)
6008 {
6009 	struct trace_iterator *iter = filp->private_data;
6010 	ssize_t sret;
6011 
6012 	/*
6013 	 * Avoid more than one consumer on a single file descriptor
6014 	 * This is just a matter of traces coherency, the ring buffer itself
6015 	 * is protected.
6016 	 */
6017 	mutex_lock(&iter->mutex);
6018 
6019 	/* return any leftover data */
6020 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6021 	if (sret != -EBUSY)
6022 		goto out;
6023 
6024 	trace_seq_init(&iter->seq);
6025 
6026 	if (iter->trace->read) {
6027 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6028 		if (sret)
6029 			goto out;
6030 	}
6031 
6032 waitagain:
6033 	sret = tracing_wait_pipe(filp);
6034 	if (sret <= 0)
6035 		goto out;
6036 
6037 	/* stop when tracing is finished */
6038 	if (trace_empty(iter)) {
6039 		sret = 0;
6040 		goto out;
6041 	}
6042 
6043 	if (cnt >= PAGE_SIZE)
6044 		cnt = PAGE_SIZE - 1;
6045 
6046 	/* reset all but tr, trace, and overruns */
6047 	memset(&iter->seq, 0,
6048 	       sizeof(struct trace_iterator) -
6049 	       offsetof(struct trace_iterator, seq));
6050 	cpumask_clear(iter->started);
6051 	trace_seq_init(&iter->seq);
6052 	iter->pos = -1;
6053 
6054 	trace_event_read_lock();
6055 	trace_access_lock(iter->cpu_file);
6056 	while (trace_find_next_entry_inc(iter) != NULL) {
6057 		enum print_line_t ret;
6058 		int save_len = iter->seq.seq.len;
6059 
6060 		ret = print_trace_line(iter);
6061 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6062 			/* don't print partial lines */
6063 			iter->seq.seq.len = save_len;
6064 			break;
6065 		}
6066 		if (ret != TRACE_TYPE_NO_CONSUME)
6067 			trace_consume(iter);
6068 
6069 		if (trace_seq_used(&iter->seq) >= cnt)
6070 			break;
6071 
6072 		/*
6073 		 * Setting the full flag means we reached the trace_seq buffer
6074 		 * size and we should leave by partial output condition above.
6075 		 * One of the trace_seq_* functions is not used properly.
6076 		 */
6077 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6078 			  iter->ent->type);
6079 	}
6080 	trace_access_unlock(iter->cpu_file);
6081 	trace_event_read_unlock();
6082 
6083 	/* Now copy what we have to the user */
6084 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6085 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6086 		trace_seq_init(&iter->seq);
6087 
6088 	/*
6089 	 * If there was nothing to send to user, in spite of consuming trace
6090 	 * entries, go back to wait for more entries.
6091 	 */
6092 	if (sret == -EBUSY)
6093 		goto waitagain;
6094 
6095 out:
6096 	mutex_unlock(&iter->mutex);
6097 
6098 	return sret;
6099 }
6100 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6101 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6102 				     unsigned int idx)
6103 {
6104 	__free_page(spd->pages[idx]);
6105 }
6106 
6107 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6108 	.confirm		= generic_pipe_buf_confirm,
6109 	.release		= generic_pipe_buf_release,
6110 	.steal			= generic_pipe_buf_steal,
6111 	.get			= generic_pipe_buf_get,
6112 };
6113 
6114 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6115 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6116 {
6117 	size_t count;
6118 	int save_len;
6119 	int ret;
6120 
6121 	/* Seq buffer is page-sized, exactly what we need. */
6122 	for (;;) {
6123 		save_len = iter->seq.seq.len;
6124 		ret = print_trace_line(iter);
6125 
6126 		if (trace_seq_has_overflowed(&iter->seq)) {
6127 			iter->seq.seq.len = save_len;
6128 			break;
6129 		}
6130 
6131 		/*
6132 		 * This should not be hit, because it should only
6133 		 * be set if the iter->seq overflowed. But check it
6134 		 * anyway to be safe.
6135 		 */
6136 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6137 			iter->seq.seq.len = save_len;
6138 			break;
6139 		}
6140 
6141 		count = trace_seq_used(&iter->seq) - save_len;
6142 		if (rem < count) {
6143 			rem = 0;
6144 			iter->seq.seq.len = save_len;
6145 			break;
6146 		}
6147 
6148 		if (ret != TRACE_TYPE_NO_CONSUME)
6149 			trace_consume(iter);
6150 		rem -= count;
6151 		if (!trace_find_next_entry_inc(iter))	{
6152 			rem = 0;
6153 			iter->ent = NULL;
6154 			break;
6155 		}
6156 	}
6157 
6158 	return rem;
6159 }
6160 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6161 static ssize_t tracing_splice_read_pipe(struct file *filp,
6162 					loff_t *ppos,
6163 					struct pipe_inode_info *pipe,
6164 					size_t len,
6165 					unsigned int flags)
6166 {
6167 	struct page *pages_def[PIPE_DEF_BUFFERS];
6168 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6169 	struct trace_iterator *iter = filp->private_data;
6170 	struct splice_pipe_desc spd = {
6171 		.pages		= pages_def,
6172 		.partial	= partial_def,
6173 		.nr_pages	= 0, /* This gets updated below. */
6174 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6175 		.ops		= &tracing_pipe_buf_ops,
6176 		.spd_release	= tracing_spd_release_pipe,
6177 	};
6178 	ssize_t ret;
6179 	size_t rem;
6180 	unsigned int i;
6181 
6182 	if (splice_grow_spd(pipe, &spd))
6183 		return -ENOMEM;
6184 
6185 	mutex_lock(&iter->mutex);
6186 
6187 	if (iter->trace->splice_read) {
6188 		ret = iter->trace->splice_read(iter, filp,
6189 					       ppos, pipe, len, flags);
6190 		if (ret)
6191 			goto out_err;
6192 	}
6193 
6194 	ret = tracing_wait_pipe(filp);
6195 	if (ret <= 0)
6196 		goto out_err;
6197 
6198 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6199 		ret = -EFAULT;
6200 		goto out_err;
6201 	}
6202 
6203 	trace_event_read_lock();
6204 	trace_access_lock(iter->cpu_file);
6205 
6206 	/* Fill as many pages as possible. */
6207 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6208 		spd.pages[i] = alloc_page(GFP_KERNEL);
6209 		if (!spd.pages[i])
6210 			break;
6211 
6212 		rem = tracing_fill_pipe_page(rem, iter);
6213 
6214 		/* Copy the data into the page, so we can start over. */
6215 		ret = trace_seq_to_buffer(&iter->seq,
6216 					  page_address(spd.pages[i]),
6217 					  trace_seq_used(&iter->seq));
6218 		if (ret < 0) {
6219 			__free_page(spd.pages[i]);
6220 			break;
6221 		}
6222 		spd.partial[i].offset = 0;
6223 		spd.partial[i].len = trace_seq_used(&iter->seq);
6224 
6225 		trace_seq_init(&iter->seq);
6226 	}
6227 
6228 	trace_access_unlock(iter->cpu_file);
6229 	trace_event_read_unlock();
6230 	mutex_unlock(&iter->mutex);
6231 
6232 	spd.nr_pages = i;
6233 
6234 	if (i)
6235 		ret = splice_to_pipe(pipe, &spd);
6236 	else
6237 		ret = 0;
6238 out:
6239 	splice_shrink_spd(&spd);
6240 	return ret;
6241 
6242 out_err:
6243 	mutex_unlock(&iter->mutex);
6244 	goto out;
6245 }
6246 
6247 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6248 tracing_entries_read(struct file *filp, char __user *ubuf,
6249 		     size_t cnt, loff_t *ppos)
6250 {
6251 	struct inode *inode = file_inode(filp);
6252 	struct trace_array *tr = inode->i_private;
6253 	int cpu = tracing_get_cpu(inode);
6254 	char buf[64];
6255 	int r = 0;
6256 	ssize_t ret;
6257 
6258 	mutex_lock(&trace_types_lock);
6259 
6260 	if (cpu == RING_BUFFER_ALL_CPUS) {
6261 		int cpu, buf_size_same;
6262 		unsigned long size;
6263 
6264 		size = 0;
6265 		buf_size_same = 1;
6266 		/* check if all cpu sizes are same */
6267 		for_each_tracing_cpu(cpu) {
6268 			/* fill in the size from first enabled cpu */
6269 			if (size == 0)
6270 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6271 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6272 				buf_size_same = 0;
6273 				break;
6274 			}
6275 		}
6276 
6277 		if (buf_size_same) {
6278 			if (!ring_buffer_expanded)
6279 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6280 					    size >> 10,
6281 					    trace_buf_size >> 10);
6282 			else
6283 				r = sprintf(buf, "%lu\n", size >> 10);
6284 		} else
6285 			r = sprintf(buf, "X\n");
6286 	} else
6287 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6288 
6289 	mutex_unlock(&trace_types_lock);
6290 
6291 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6292 	return ret;
6293 }
6294 
6295 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6296 tracing_entries_write(struct file *filp, const char __user *ubuf,
6297 		      size_t cnt, loff_t *ppos)
6298 {
6299 	struct inode *inode = file_inode(filp);
6300 	struct trace_array *tr = inode->i_private;
6301 	unsigned long val;
6302 	int ret;
6303 
6304 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6305 	if (ret)
6306 		return ret;
6307 
6308 	/* must have at least 1 entry */
6309 	if (!val)
6310 		return -EINVAL;
6311 
6312 	/* value is in KB */
6313 	val <<= 10;
6314 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6315 	if (ret < 0)
6316 		return ret;
6317 
6318 	*ppos += cnt;
6319 
6320 	return cnt;
6321 }
6322 
6323 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6324 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6325 				size_t cnt, loff_t *ppos)
6326 {
6327 	struct trace_array *tr = filp->private_data;
6328 	char buf[64];
6329 	int r, cpu;
6330 	unsigned long size = 0, expanded_size = 0;
6331 
6332 	mutex_lock(&trace_types_lock);
6333 	for_each_tracing_cpu(cpu) {
6334 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6335 		if (!ring_buffer_expanded)
6336 			expanded_size += trace_buf_size >> 10;
6337 	}
6338 	if (ring_buffer_expanded)
6339 		r = sprintf(buf, "%lu\n", size);
6340 	else
6341 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6342 	mutex_unlock(&trace_types_lock);
6343 
6344 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6345 }
6346 
6347 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6348 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6349 			  size_t cnt, loff_t *ppos)
6350 {
6351 	/*
6352 	 * There is no need to read what the user has written, this function
6353 	 * is just to make sure that there is no error when "echo" is used
6354 	 */
6355 
6356 	*ppos += cnt;
6357 
6358 	return cnt;
6359 }
6360 
6361 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6362 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6363 {
6364 	struct trace_array *tr = inode->i_private;
6365 
6366 	/* disable tracing ? */
6367 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6368 		tracer_tracing_off(tr);
6369 	/* resize the ring buffer to 0 */
6370 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6371 
6372 	trace_array_put(tr);
6373 
6374 	return 0;
6375 }
6376 
6377 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6378 tracing_mark_write(struct file *filp, const char __user *ubuf,
6379 					size_t cnt, loff_t *fpos)
6380 {
6381 	struct trace_array *tr = filp->private_data;
6382 	struct ring_buffer_event *event;
6383 	enum event_trigger_type tt = ETT_NONE;
6384 	struct ring_buffer *buffer;
6385 	struct print_entry *entry;
6386 	unsigned long irq_flags;
6387 	ssize_t written;
6388 	int size;
6389 	int len;
6390 
6391 /* Used in tracing_mark_raw_write() as well */
6392 #define FAULTED_STR "<faulted>"
6393 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6394 
6395 	if (tracing_disabled)
6396 		return -EINVAL;
6397 
6398 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6399 		return -EINVAL;
6400 
6401 	if (cnt > TRACE_BUF_SIZE)
6402 		cnt = TRACE_BUF_SIZE;
6403 
6404 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6405 
6406 	local_save_flags(irq_flags);
6407 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6408 
6409 	/* If less than "<faulted>", then make sure we can still add that */
6410 	if (cnt < FAULTED_SIZE)
6411 		size += FAULTED_SIZE - cnt;
6412 
6413 	buffer = tr->trace_buffer.buffer;
6414 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6415 					    irq_flags, preempt_count());
6416 	if (unlikely(!event))
6417 		/* Ring buffer disabled, return as if not open for write */
6418 		return -EBADF;
6419 
6420 	entry = ring_buffer_event_data(event);
6421 	entry->ip = _THIS_IP_;
6422 
6423 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6424 	if (len) {
6425 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6426 		cnt = FAULTED_SIZE;
6427 		written = -EFAULT;
6428 	} else
6429 		written = cnt;
6430 	len = cnt;
6431 
6432 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6433 		/* do not add \n before testing triggers, but add \0 */
6434 		entry->buf[cnt] = '\0';
6435 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6436 	}
6437 
6438 	if (entry->buf[cnt - 1] != '\n') {
6439 		entry->buf[cnt] = '\n';
6440 		entry->buf[cnt + 1] = '\0';
6441 	} else
6442 		entry->buf[cnt] = '\0';
6443 
6444 	__buffer_unlock_commit(buffer, event);
6445 
6446 	if (tt)
6447 		event_triggers_post_call(tr->trace_marker_file, tt);
6448 
6449 	if (written > 0)
6450 		*fpos += written;
6451 
6452 	return written;
6453 }
6454 
6455 /* Limit it for now to 3K (including tag) */
6456 #define RAW_DATA_MAX_SIZE (1024*3)
6457 
6458 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6459 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6460 					size_t cnt, loff_t *fpos)
6461 {
6462 	struct trace_array *tr = filp->private_data;
6463 	struct ring_buffer_event *event;
6464 	struct ring_buffer *buffer;
6465 	struct raw_data_entry *entry;
6466 	unsigned long irq_flags;
6467 	ssize_t written;
6468 	int size;
6469 	int len;
6470 
6471 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6472 
6473 	if (tracing_disabled)
6474 		return -EINVAL;
6475 
6476 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6477 		return -EINVAL;
6478 
6479 	/* The marker must at least have a tag id */
6480 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6481 		return -EINVAL;
6482 
6483 	if (cnt > TRACE_BUF_SIZE)
6484 		cnt = TRACE_BUF_SIZE;
6485 
6486 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6487 
6488 	local_save_flags(irq_flags);
6489 	size = sizeof(*entry) + cnt;
6490 	if (cnt < FAULT_SIZE_ID)
6491 		size += FAULT_SIZE_ID - cnt;
6492 
6493 	buffer = tr->trace_buffer.buffer;
6494 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6495 					    irq_flags, preempt_count());
6496 	if (!event)
6497 		/* Ring buffer disabled, return as if not open for write */
6498 		return -EBADF;
6499 
6500 	entry = ring_buffer_event_data(event);
6501 
6502 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6503 	if (len) {
6504 		entry->id = -1;
6505 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6506 		written = -EFAULT;
6507 	} else
6508 		written = cnt;
6509 
6510 	__buffer_unlock_commit(buffer, event);
6511 
6512 	if (written > 0)
6513 		*fpos += written;
6514 
6515 	return written;
6516 }
6517 
tracing_clock_show(struct seq_file * m,void * v)6518 static int tracing_clock_show(struct seq_file *m, void *v)
6519 {
6520 	struct trace_array *tr = m->private;
6521 	int i;
6522 
6523 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6524 		seq_printf(m,
6525 			"%s%s%s%s", i ? " " : "",
6526 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6527 			i == tr->clock_id ? "]" : "");
6528 	seq_putc(m, '\n');
6529 
6530 	return 0;
6531 }
6532 
tracing_set_clock(struct trace_array * tr,const char * clockstr)6533 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6534 {
6535 	int i;
6536 
6537 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6538 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6539 			break;
6540 	}
6541 	if (i == ARRAY_SIZE(trace_clocks))
6542 		return -EINVAL;
6543 
6544 	mutex_lock(&trace_types_lock);
6545 
6546 	tr->clock_id = i;
6547 
6548 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6549 
6550 	/*
6551 	 * New clock may not be consistent with the previous clock.
6552 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6553 	 */
6554 	tracing_reset_online_cpus(&tr->trace_buffer);
6555 
6556 #ifdef CONFIG_TRACER_MAX_TRACE
6557 	if (tr->max_buffer.buffer)
6558 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6559 	tracing_reset_online_cpus(&tr->max_buffer);
6560 #endif
6561 
6562 	mutex_unlock(&trace_types_lock);
6563 
6564 	return 0;
6565 }
6566 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6567 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6568 				   size_t cnt, loff_t *fpos)
6569 {
6570 	struct seq_file *m = filp->private_data;
6571 	struct trace_array *tr = m->private;
6572 	char buf[64];
6573 	const char *clockstr;
6574 	int ret;
6575 
6576 	if (cnt >= sizeof(buf))
6577 		return -EINVAL;
6578 
6579 	if (copy_from_user(buf, ubuf, cnt))
6580 		return -EFAULT;
6581 
6582 	buf[cnt] = 0;
6583 
6584 	clockstr = strstrip(buf);
6585 
6586 	ret = tracing_set_clock(tr, clockstr);
6587 	if (ret)
6588 		return ret;
6589 
6590 	*fpos += cnt;
6591 
6592 	return cnt;
6593 }
6594 
tracing_clock_open(struct inode * inode,struct file * file)6595 static int tracing_clock_open(struct inode *inode, struct file *file)
6596 {
6597 	struct trace_array *tr = inode->i_private;
6598 	int ret;
6599 
6600 	ret = tracing_check_open_get_tr(tr);
6601 	if (ret)
6602 		return ret;
6603 
6604 	ret = single_open(file, tracing_clock_show, inode->i_private);
6605 	if (ret < 0)
6606 		trace_array_put(tr);
6607 
6608 	return ret;
6609 }
6610 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6611 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6612 {
6613 	struct trace_array *tr = m->private;
6614 
6615 	mutex_lock(&trace_types_lock);
6616 
6617 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6618 		seq_puts(m, "delta [absolute]\n");
6619 	else
6620 		seq_puts(m, "[delta] absolute\n");
6621 
6622 	mutex_unlock(&trace_types_lock);
6623 
6624 	return 0;
6625 }
6626 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6627 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6628 {
6629 	struct trace_array *tr = inode->i_private;
6630 	int ret;
6631 
6632 	ret = tracing_check_open_get_tr(tr);
6633 	if (ret)
6634 		return ret;
6635 
6636 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6637 	if (ret < 0)
6638 		trace_array_put(tr);
6639 
6640 	return ret;
6641 }
6642 
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)6643 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6644 {
6645 	int ret = 0;
6646 
6647 	mutex_lock(&trace_types_lock);
6648 
6649 	if (abs && tr->time_stamp_abs_ref++)
6650 		goto out;
6651 
6652 	if (!abs) {
6653 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6654 			ret = -EINVAL;
6655 			goto out;
6656 		}
6657 
6658 		if (--tr->time_stamp_abs_ref)
6659 			goto out;
6660 	}
6661 
6662 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6663 
6664 #ifdef CONFIG_TRACER_MAX_TRACE
6665 	if (tr->max_buffer.buffer)
6666 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6667 #endif
6668  out:
6669 	mutex_unlock(&trace_types_lock);
6670 
6671 	return ret;
6672 }
6673 
6674 struct ftrace_buffer_info {
6675 	struct trace_iterator	iter;
6676 	void			*spare;
6677 	unsigned int		spare_cpu;
6678 	unsigned int		read;
6679 };
6680 
6681 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)6682 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6683 {
6684 	struct trace_array *tr = inode->i_private;
6685 	struct trace_iterator *iter;
6686 	struct seq_file *m;
6687 	int ret;
6688 
6689 	ret = tracing_check_open_get_tr(tr);
6690 	if (ret)
6691 		return ret;
6692 
6693 	if (file->f_mode & FMODE_READ) {
6694 		iter = __tracing_open(inode, file, true);
6695 		if (IS_ERR(iter))
6696 			ret = PTR_ERR(iter);
6697 	} else {
6698 		/* Writes still need the seq_file to hold the private data */
6699 		ret = -ENOMEM;
6700 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6701 		if (!m)
6702 			goto out;
6703 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6704 		if (!iter) {
6705 			kfree(m);
6706 			goto out;
6707 		}
6708 		ret = 0;
6709 
6710 		iter->tr = tr;
6711 		iter->trace_buffer = &tr->max_buffer;
6712 		iter->cpu_file = tracing_get_cpu(inode);
6713 		m->private = iter;
6714 		file->private_data = m;
6715 	}
6716 out:
6717 	if (ret < 0)
6718 		trace_array_put(tr);
6719 
6720 	return ret;
6721 }
6722 
6723 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6724 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6725 		       loff_t *ppos)
6726 {
6727 	struct seq_file *m = filp->private_data;
6728 	struct trace_iterator *iter = m->private;
6729 	struct trace_array *tr = iter->tr;
6730 	unsigned long val;
6731 	int ret;
6732 
6733 	ret = tracing_update_buffers();
6734 	if (ret < 0)
6735 		return ret;
6736 
6737 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6738 	if (ret)
6739 		return ret;
6740 
6741 	mutex_lock(&trace_types_lock);
6742 
6743 	if (tr->current_trace->use_max_tr) {
6744 		ret = -EBUSY;
6745 		goto out;
6746 	}
6747 
6748 	arch_spin_lock(&tr->max_lock);
6749 	if (tr->cond_snapshot)
6750 		ret = -EBUSY;
6751 	arch_spin_unlock(&tr->max_lock);
6752 	if (ret)
6753 		goto out;
6754 
6755 	switch (val) {
6756 	case 0:
6757 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6758 			ret = -EINVAL;
6759 			break;
6760 		}
6761 		if (tr->allocated_snapshot)
6762 			free_snapshot(tr);
6763 		break;
6764 	case 1:
6765 /* Only allow per-cpu swap if the ring buffer supports it */
6766 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6767 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6768 			ret = -EINVAL;
6769 			break;
6770 		}
6771 #endif
6772 		if (tr->allocated_snapshot)
6773 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6774 					&tr->trace_buffer, iter->cpu_file);
6775 		else
6776 			ret = tracing_alloc_snapshot_instance(tr);
6777 		if (ret < 0)
6778 			break;
6779 		local_irq_disable();
6780 		/* Now, we're going to swap */
6781 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6782 			update_max_tr(tr, current, smp_processor_id(), NULL);
6783 		else
6784 			update_max_tr_single(tr, current, iter->cpu_file);
6785 		local_irq_enable();
6786 		break;
6787 	default:
6788 		if (tr->allocated_snapshot) {
6789 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6790 				tracing_reset_online_cpus(&tr->max_buffer);
6791 			else
6792 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6793 		}
6794 		break;
6795 	}
6796 
6797 	if (ret >= 0) {
6798 		*ppos += cnt;
6799 		ret = cnt;
6800 	}
6801 out:
6802 	mutex_unlock(&trace_types_lock);
6803 	return ret;
6804 }
6805 
tracing_snapshot_release(struct inode * inode,struct file * file)6806 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6807 {
6808 	struct seq_file *m = file->private_data;
6809 	int ret;
6810 
6811 	ret = tracing_release(inode, file);
6812 
6813 	if (file->f_mode & FMODE_READ)
6814 		return ret;
6815 
6816 	/* If write only, the seq_file is just a stub */
6817 	if (m)
6818 		kfree(m->private);
6819 	kfree(m);
6820 
6821 	return 0;
6822 }
6823 
6824 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6825 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6826 				    size_t count, loff_t *ppos);
6827 static int tracing_buffers_release(struct inode *inode, struct file *file);
6828 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6829 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6830 
snapshot_raw_open(struct inode * inode,struct file * filp)6831 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6832 {
6833 	struct ftrace_buffer_info *info;
6834 	int ret;
6835 
6836 	/* The following checks for tracefs lockdown */
6837 	ret = tracing_buffers_open(inode, filp);
6838 	if (ret < 0)
6839 		return ret;
6840 
6841 	info = filp->private_data;
6842 
6843 	if (info->iter.trace->use_max_tr) {
6844 		tracing_buffers_release(inode, filp);
6845 		return -EBUSY;
6846 	}
6847 
6848 	info->iter.snapshot = true;
6849 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6850 
6851 	return ret;
6852 }
6853 
6854 #endif /* CONFIG_TRACER_SNAPSHOT */
6855 
6856 
6857 static const struct file_operations tracing_thresh_fops = {
6858 	.open		= tracing_open_generic,
6859 	.read		= tracing_thresh_read,
6860 	.write		= tracing_thresh_write,
6861 	.llseek		= generic_file_llseek,
6862 };
6863 
6864 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6865 static const struct file_operations tracing_max_lat_fops = {
6866 	.open		= tracing_open_generic,
6867 	.read		= tracing_max_lat_read,
6868 	.write		= tracing_max_lat_write,
6869 	.llseek		= generic_file_llseek,
6870 };
6871 #endif
6872 
6873 static const struct file_operations set_tracer_fops = {
6874 	.open		= tracing_open_generic,
6875 	.read		= tracing_set_trace_read,
6876 	.write		= tracing_set_trace_write,
6877 	.llseek		= generic_file_llseek,
6878 };
6879 
6880 static const struct file_operations tracing_pipe_fops = {
6881 	.open		= tracing_open_pipe,
6882 	.poll		= tracing_poll_pipe,
6883 	.read		= tracing_read_pipe,
6884 	.splice_read	= tracing_splice_read_pipe,
6885 	.release	= tracing_release_pipe,
6886 	.llseek		= no_llseek,
6887 };
6888 
6889 static const struct file_operations tracing_entries_fops = {
6890 	.open		= tracing_open_generic_tr,
6891 	.read		= tracing_entries_read,
6892 	.write		= tracing_entries_write,
6893 	.llseek		= generic_file_llseek,
6894 	.release	= tracing_release_generic_tr,
6895 };
6896 
6897 static const struct file_operations tracing_total_entries_fops = {
6898 	.open		= tracing_open_generic_tr,
6899 	.read		= tracing_total_entries_read,
6900 	.llseek		= generic_file_llseek,
6901 	.release	= tracing_release_generic_tr,
6902 };
6903 
6904 static const struct file_operations tracing_free_buffer_fops = {
6905 	.open		= tracing_open_generic_tr,
6906 	.write		= tracing_free_buffer_write,
6907 	.release	= tracing_free_buffer_release,
6908 };
6909 
6910 static const struct file_operations tracing_mark_fops = {
6911 	.open		= tracing_open_generic_tr,
6912 	.write		= tracing_mark_write,
6913 	.llseek		= generic_file_llseek,
6914 	.release	= tracing_release_generic_tr,
6915 };
6916 
6917 static const struct file_operations tracing_mark_raw_fops = {
6918 	.open		= tracing_open_generic_tr,
6919 	.write		= tracing_mark_raw_write,
6920 	.llseek		= generic_file_llseek,
6921 	.release	= tracing_release_generic_tr,
6922 };
6923 
6924 static const struct file_operations trace_clock_fops = {
6925 	.open		= tracing_clock_open,
6926 	.read		= seq_read,
6927 	.llseek		= seq_lseek,
6928 	.release	= tracing_single_release_tr,
6929 	.write		= tracing_clock_write,
6930 };
6931 
6932 static const struct file_operations trace_time_stamp_mode_fops = {
6933 	.open		= tracing_time_stamp_mode_open,
6934 	.read		= seq_read,
6935 	.llseek		= seq_lseek,
6936 	.release	= tracing_single_release_tr,
6937 };
6938 
6939 #ifdef CONFIG_TRACER_SNAPSHOT
6940 static const struct file_operations snapshot_fops = {
6941 	.open		= tracing_snapshot_open,
6942 	.read		= seq_read,
6943 	.write		= tracing_snapshot_write,
6944 	.llseek		= tracing_lseek,
6945 	.release	= tracing_snapshot_release,
6946 };
6947 
6948 static const struct file_operations snapshot_raw_fops = {
6949 	.open		= snapshot_raw_open,
6950 	.read		= tracing_buffers_read,
6951 	.release	= tracing_buffers_release,
6952 	.splice_read	= tracing_buffers_splice_read,
6953 	.llseek		= no_llseek,
6954 };
6955 
6956 #endif /* CONFIG_TRACER_SNAPSHOT */
6957 
6958 #define TRACING_LOG_ERRS_MAX	8
6959 #define TRACING_LOG_LOC_MAX	128
6960 
6961 #define CMD_PREFIX "  Command: "
6962 
6963 struct err_info {
6964 	const char	**errs;	/* ptr to loc-specific array of err strings */
6965 	u8		type;	/* index into errs -> specific err string */
6966 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
6967 	u64		ts;
6968 };
6969 
6970 struct tracing_log_err {
6971 	struct list_head	list;
6972 	struct err_info		info;
6973 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
6974 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
6975 };
6976 
6977 static DEFINE_MUTEX(tracing_err_log_lock);
6978 
get_tracing_log_err(struct trace_array * tr)6979 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
6980 {
6981 	struct tracing_log_err *err;
6982 
6983 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6984 		err = kzalloc(sizeof(*err), GFP_KERNEL);
6985 		if (!err)
6986 			err = ERR_PTR(-ENOMEM);
6987 		tr->n_err_log_entries++;
6988 
6989 		return err;
6990 	}
6991 
6992 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6993 	list_del(&err->list);
6994 
6995 	return err;
6996 }
6997 
6998 /**
6999  * err_pos - find the position of a string within a command for error careting
7000  * @cmd: The tracing command that caused the error
7001  * @str: The string to position the caret at within @cmd
7002  *
7003  * Finds the position of the first occurence of @str within @cmd.  The
7004  * return value can be passed to tracing_log_err() for caret placement
7005  * within @cmd.
7006  *
7007  * Returns the index within @cmd of the first occurence of @str or 0
7008  * if @str was not found.
7009  */
err_pos(char * cmd,const char * str)7010 unsigned int err_pos(char *cmd, const char *str)
7011 {
7012 	char *found;
7013 
7014 	if (WARN_ON(!strlen(cmd)))
7015 		return 0;
7016 
7017 	found = strstr(cmd, str);
7018 	if (found)
7019 		return found - cmd;
7020 
7021 	return 0;
7022 }
7023 
7024 /**
7025  * tracing_log_err - write an error to the tracing error log
7026  * @tr: The associated trace array for the error (NULL for top level array)
7027  * @loc: A string describing where the error occurred
7028  * @cmd: The tracing command that caused the error
7029  * @errs: The array of loc-specific static error strings
7030  * @type: The index into errs[], which produces the specific static err string
7031  * @pos: The position the caret should be placed in the cmd
7032  *
7033  * Writes an error into tracing/error_log of the form:
7034  *
7035  * <loc>: error: <text>
7036  *   Command: <cmd>
7037  *              ^
7038  *
7039  * tracing/error_log is a small log file containing the last
7040  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7041  * unless there has been a tracing error, and the error log can be
7042  * cleared and have its memory freed by writing the empty string in
7043  * truncation mode to it i.e. echo > tracing/error_log.
7044  *
7045  * NOTE: the @errs array along with the @type param are used to
7046  * produce a static error string - this string is not copied and saved
7047  * when the error is logged - only a pointer to it is saved.  See
7048  * existing callers for examples of how static strings are typically
7049  * defined for use with tracing_log_err().
7050  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7051 void tracing_log_err(struct trace_array *tr,
7052 		     const char *loc, const char *cmd,
7053 		     const char **errs, u8 type, u8 pos)
7054 {
7055 	struct tracing_log_err *err;
7056 
7057 	if (!tr)
7058 		tr = &global_trace;
7059 
7060 	mutex_lock(&tracing_err_log_lock);
7061 	err = get_tracing_log_err(tr);
7062 	if (PTR_ERR(err) == -ENOMEM) {
7063 		mutex_unlock(&tracing_err_log_lock);
7064 		return;
7065 	}
7066 
7067 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7068 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7069 
7070 	err->info.errs = errs;
7071 	err->info.type = type;
7072 	err->info.pos = pos;
7073 	err->info.ts = local_clock();
7074 
7075 	list_add_tail(&err->list, &tr->err_log);
7076 	mutex_unlock(&tracing_err_log_lock);
7077 }
7078 
clear_tracing_err_log(struct trace_array * tr)7079 static void clear_tracing_err_log(struct trace_array *tr)
7080 {
7081 	struct tracing_log_err *err, *next;
7082 
7083 	mutex_lock(&tracing_err_log_lock);
7084 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7085 		list_del(&err->list);
7086 		kfree(err);
7087 	}
7088 
7089 	tr->n_err_log_entries = 0;
7090 	mutex_unlock(&tracing_err_log_lock);
7091 }
7092 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7093 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7094 {
7095 	struct trace_array *tr = m->private;
7096 
7097 	mutex_lock(&tracing_err_log_lock);
7098 
7099 	return seq_list_start(&tr->err_log, *pos);
7100 }
7101 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7102 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7103 {
7104 	struct trace_array *tr = m->private;
7105 
7106 	return seq_list_next(v, &tr->err_log, pos);
7107 }
7108 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7109 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7110 {
7111 	mutex_unlock(&tracing_err_log_lock);
7112 }
7113 
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7114 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7115 {
7116 	u8 i;
7117 
7118 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7119 		seq_putc(m, ' ');
7120 	for (i = 0; i < pos; i++)
7121 		seq_putc(m, ' ');
7122 	seq_puts(m, "^\n");
7123 }
7124 
tracing_err_log_seq_show(struct seq_file * m,void * v)7125 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7126 {
7127 	struct tracing_log_err *err = v;
7128 
7129 	if (err) {
7130 		const char *err_text = err->info.errs[err->info.type];
7131 		u64 sec = err->info.ts;
7132 		u32 nsec;
7133 
7134 		nsec = do_div(sec, NSEC_PER_SEC);
7135 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7136 			   err->loc, err_text);
7137 		seq_printf(m, "%s", err->cmd);
7138 		tracing_err_log_show_pos(m, err->info.pos);
7139 	}
7140 
7141 	return 0;
7142 }
7143 
7144 static const struct seq_operations tracing_err_log_seq_ops = {
7145 	.start  = tracing_err_log_seq_start,
7146 	.next   = tracing_err_log_seq_next,
7147 	.stop   = tracing_err_log_seq_stop,
7148 	.show   = tracing_err_log_seq_show
7149 };
7150 
tracing_err_log_open(struct inode * inode,struct file * file)7151 static int tracing_err_log_open(struct inode *inode, struct file *file)
7152 {
7153 	struct trace_array *tr = inode->i_private;
7154 	int ret = 0;
7155 
7156 	ret = tracing_check_open_get_tr(tr);
7157 	if (ret)
7158 		return ret;
7159 
7160 	/* If this file was opened for write, then erase contents */
7161 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7162 		clear_tracing_err_log(tr);
7163 
7164 	if (file->f_mode & FMODE_READ) {
7165 		ret = seq_open(file, &tracing_err_log_seq_ops);
7166 		if (!ret) {
7167 			struct seq_file *m = file->private_data;
7168 			m->private = tr;
7169 		} else {
7170 			trace_array_put(tr);
7171 		}
7172 	}
7173 	return ret;
7174 }
7175 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7176 static ssize_t tracing_err_log_write(struct file *file,
7177 				     const char __user *buffer,
7178 				     size_t count, loff_t *ppos)
7179 {
7180 	return count;
7181 }
7182 
tracing_err_log_release(struct inode * inode,struct file * file)7183 static int tracing_err_log_release(struct inode *inode, struct file *file)
7184 {
7185 	struct trace_array *tr = inode->i_private;
7186 
7187 	trace_array_put(tr);
7188 
7189 	if (file->f_mode & FMODE_READ)
7190 		seq_release(inode, file);
7191 
7192 	return 0;
7193 }
7194 
7195 static const struct file_operations tracing_err_log_fops = {
7196 	.open           = tracing_err_log_open,
7197 	.write		= tracing_err_log_write,
7198 	.read           = seq_read,
7199 	.llseek         = seq_lseek,
7200 	.release        = tracing_err_log_release,
7201 };
7202 
tracing_buffers_open(struct inode * inode,struct file * filp)7203 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7204 {
7205 	struct trace_array *tr = inode->i_private;
7206 	struct ftrace_buffer_info *info;
7207 	int ret;
7208 
7209 	ret = tracing_check_open_get_tr(tr);
7210 	if (ret)
7211 		return ret;
7212 
7213 	info = kzalloc(sizeof(*info), GFP_KERNEL);
7214 	if (!info) {
7215 		trace_array_put(tr);
7216 		return -ENOMEM;
7217 	}
7218 
7219 	mutex_lock(&trace_types_lock);
7220 
7221 	info->iter.tr		= tr;
7222 	info->iter.cpu_file	= tracing_get_cpu(inode);
7223 	info->iter.trace	= tr->current_trace;
7224 	info->iter.trace_buffer = &tr->trace_buffer;
7225 	info->spare		= NULL;
7226 	/* Force reading ring buffer for first read */
7227 	info->read		= (unsigned int)-1;
7228 
7229 	filp->private_data = info;
7230 
7231 	tr->current_trace->ref++;
7232 
7233 	mutex_unlock(&trace_types_lock);
7234 
7235 	ret = nonseekable_open(inode, filp);
7236 	if (ret < 0)
7237 		trace_array_put(tr);
7238 
7239 	return ret;
7240 }
7241 
7242 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7243 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7244 {
7245 	struct ftrace_buffer_info *info = filp->private_data;
7246 	struct trace_iterator *iter = &info->iter;
7247 
7248 	return trace_poll(iter, filp, poll_table);
7249 }
7250 
7251 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7252 tracing_buffers_read(struct file *filp, char __user *ubuf,
7253 		     size_t count, loff_t *ppos)
7254 {
7255 	struct ftrace_buffer_info *info = filp->private_data;
7256 	struct trace_iterator *iter = &info->iter;
7257 	ssize_t ret = 0;
7258 	ssize_t size;
7259 
7260 	if (!count)
7261 		return 0;
7262 
7263 #ifdef CONFIG_TRACER_MAX_TRACE
7264 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7265 		return -EBUSY;
7266 #endif
7267 
7268 	if (!info->spare) {
7269 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7270 							  iter->cpu_file);
7271 		if (IS_ERR(info->spare)) {
7272 			ret = PTR_ERR(info->spare);
7273 			info->spare = NULL;
7274 		} else {
7275 			info->spare_cpu = iter->cpu_file;
7276 		}
7277 	}
7278 	if (!info->spare)
7279 		return ret;
7280 
7281 	/* Do we have previous read data to read? */
7282 	if (info->read < PAGE_SIZE)
7283 		goto read;
7284 
7285  again:
7286 	trace_access_lock(iter->cpu_file);
7287 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7288 				    &info->spare,
7289 				    count,
7290 				    iter->cpu_file, 0);
7291 	trace_access_unlock(iter->cpu_file);
7292 
7293 	if (ret < 0) {
7294 		if (trace_empty(iter)) {
7295 			if ((filp->f_flags & O_NONBLOCK))
7296 				return -EAGAIN;
7297 
7298 			ret = wait_on_pipe(iter, 0);
7299 			if (ret)
7300 				return ret;
7301 
7302 			goto again;
7303 		}
7304 		return 0;
7305 	}
7306 
7307 	info->read = 0;
7308  read:
7309 	size = PAGE_SIZE - info->read;
7310 	if (size > count)
7311 		size = count;
7312 
7313 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7314 	if (ret == size)
7315 		return -EFAULT;
7316 
7317 	size -= ret;
7318 
7319 	*ppos += size;
7320 	info->read += size;
7321 
7322 	return size;
7323 }
7324 
tracing_buffers_release(struct inode * inode,struct file * file)7325 static int tracing_buffers_release(struct inode *inode, struct file *file)
7326 {
7327 	struct ftrace_buffer_info *info = file->private_data;
7328 	struct trace_iterator *iter = &info->iter;
7329 
7330 	mutex_lock(&trace_types_lock);
7331 
7332 	iter->tr->current_trace->ref--;
7333 
7334 	__trace_array_put(iter->tr);
7335 
7336 	if (info->spare)
7337 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7338 					   info->spare_cpu, info->spare);
7339 	kfree(info);
7340 
7341 	mutex_unlock(&trace_types_lock);
7342 
7343 	return 0;
7344 }
7345 
7346 struct buffer_ref {
7347 	struct ring_buffer	*buffer;
7348 	void			*page;
7349 	int			cpu;
7350 	refcount_t		refcount;
7351 };
7352 
buffer_ref_release(struct buffer_ref * ref)7353 static void buffer_ref_release(struct buffer_ref *ref)
7354 {
7355 	if (!refcount_dec_and_test(&ref->refcount))
7356 		return;
7357 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7358 	kfree(ref);
7359 }
7360 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7361 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7362 				    struct pipe_buffer *buf)
7363 {
7364 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7365 
7366 	buffer_ref_release(ref);
7367 	buf->private = 0;
7368 }
7369 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7370 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7371 				struct pipe_buffer *buf)
7372 {
7373 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7374 
7375 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7376 		return false;
7377 
7378 	refcount_inc(&ref->refcount);
7379 	return true;
7380 }
7381 
7382 /* Pipe buffer operations for a buffer. */
7383 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7384 	.confirm		= generic_pipe_buf_confirm,
7385 	.release		= buffer_pipe_buf_release,
7386 	.steal			= generic_pipe_buf_nosteal,
7387 	.get			= buffer_pipe_buf_get,
7388 };
7389 
7390 /*
7391  * Callback from splice_to_pipe(), if we need to release some pages
7392  * at the end of the spd in case we error'ed out in filling the pipe.
7393  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7394 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7395 {
7396 	struct buffer_ref *ref =
7397 		(struct buffer_ref *)spd->partial[i].private;
7398 
7399 	buffer_ref_release(ref);
7400 	spd->partial[i].private = 0;
7401 }
7402 
7403 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7404 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7405 			    struct pipe_inode_info *pipe, size_t len,
7406 			    unsigned int flags)
7407 {
7408 	struct ftrace_buffer_info *info = file->private_data;
7409 	struct trace_iterator *iter = &info->iter;
7410 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7411 	struct page *pages_def[PIPE_DEF_BUFFERS];
7412 	struct splice_pipe_desc spd = {
7413 		.pages		= pages_def,
7414 		.partial	= partial_def,
7415 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7416 		.ops		= &buffer_pipe_buf_ops,
7417 		.spd_release	= buffer_spd_release,
7418 	};
7419 	struct buffer_ref *ref;
7420 	int entries, i;
7421 	ssize_t ret = 0;
7422 
7423 #ifdef CONFIG_TRACER_MAX_TRACE
7424 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7425 		return -EBUSY;
7426 #endif
7427 
7428 	if (*ppos & (PAGE_SIZE - 1))
7429 		return -EINVAL;
7430 
7431 	if (len & (PAGE_SIZE - 1)) {
7432 		if (len < PAGE_SIZE)
7433 			return -EINVAL;
7434 		len &= PAGE_MASK;
7435 	}
7436 
7437 	if (splice_grow_spd(pipe, &spd))
7438 		return -ENOMEM;
7439 
7440  again:
7441 	trace_access_lock(iter->cpu_file);
7442 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7443 
7444 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7445 		struct page *page;
7446 		int r;
7447 
7448 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7449 		if (!ref) {
7450 			ret = -ENOMEM;
7451 			break;
7452 		}
7453 
7454 		refcount_set(&ref->refcount, 1);
7455 		ref->buffer = iter->trace_buffer->buffer;
7456 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7457 		if (IS_ERR(ref->page)) {
7458 			ret = PTR_ERR(ref->page);
7459 			ref->page = NULL;
7460 			kfree(ref);
7461 			break;
7462 		}
7463 		ref->cpu = iter->cpu_file;
7464 
7465 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7466 					  len, iter->cpu_file, 1);
7467 		if (r < 0) {
7468 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7469 						   ref->page);
7470 			kfree(ref);
7471 			break;
7472 		}
7473 
7474 		page = virt_to_page(ref->page);
7475 
7476 		spd.pages[i] = page;
7477 		spd.partial[i].len = PAGE_SIZE;
7478 		spd.partial[i].offset = 0;
7479 		spd.partial[i].private = (unsigned long)ref;
7480 		spd.nr_pages++;
7481 		*ppos += PAGE_SIZE;
7482 
7483 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7484 	}
7485 
7486 	trace_access_unlock(iter->cpu_file);
7487 	spd.nr_pages = i;
7488 
7489 	/* did we read anything? */
7490 	if (!spd.nr_pages) {
7491 		if (ret)
7492 			goto out;
7493 
7494 		ret = -EAGAIN;
7495 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7496 			goto out;
7497 
7498 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7499 		if (ret)
7500 			goto out;
7501 
7502 		goto again;
7503 	}
7504 
7505 	ret = splice_to_pipe(pipe, &spd);
7506 out:
7507 	splice_shrink_spd(&spd);
7508 
7509 	return ret;
7510 }
7511 
7512 static const struct file_operations tracing_buffers_fops = {
7513 	.open		= tracing_buffers_open,
7514 	.read		= tracing_buffers_read,
7515 	.poll		= tracing_buffers_poll,
7516 	.release	= tracing_buffers_release,
7517 	.splice_read	= tracing_buffers_splice_read,
7518 	.llseek		= no_llseek,
7519 };
7520 
7521 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7522 tracing_stats_read(struct file *filp, char __user *ubuf,
7523 		   size_t count, loff_t *ppos)
7524 {
7525 	struct inode *inode = file_inode(filp);
7526 	struct trace_array *tr = inode->i_private;
7527 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7528 	int cpu = tracing_get_cpu(inode);
7529 	struct trace_seq *s;
7530 	unsigned long cnt;
7531 	unsigned long long t;
7532 	unsigned long usec_rem;
7533 
7534 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7535 	if (!s)
7536 		return -ENOMEM;
7537 
7538 	trace_seq_init(s);
7539 
7540 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7541 	trace_seq_printf(s, "entries: %ld\n", cnt);
7542 
7543 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7544 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7545 
7546 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7547 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7548 
7549 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7550 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7551 
7552 	if (trace_clocks[tr->clock_id].in_ns) {
7553 		/* local or global for trace_clock */
7554 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7555 		usec_rem = do_div(t, USEC_PER_SEC);
7556 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7557 								t, usec_rem);
7558 
7559 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7560 		usec_rem = do_div(t, USEC_PER_SEC);
7561 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7562 	} else {
7563 		/* counter or tsc mode for trace_clock */
7564 		trace_seq_printf(s, "oldest event ts: %llu\n",
7565 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7566 
7567 		trace_seq_printf(s, "now ts: %llu\n",
7568 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7569 	}
7570 
7571 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7572 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7573 
7574 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7575 	trace_seq_printf(s, "read events: %ld\n", cnt);
7576 
7577 	count = simple_read_from_buffer(ubuf, count, ppos,
7578 					s->buffer, trace_seq_used(s));
7579 
7580 	kfree(s);
7581 
7582 	return count;
7583 }
7584 
7585 static const struct file_operations tracing_stats_fops = {
7586 	.open		= tracing_open_generic_tr,
7587 	.read		= tracing_stats_read,
7588 	.llseek		= generic_file_llseek,
7589 	.release	= tracing_release_generic_tr,
7590 };
7591 
7592 #ifdef CONFIG_DYNAMIC_FTRACE
7593 
7594 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7595 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7596 		  size_t cnt, loff_t *ppos)
7597 {
7598 	unsigned long *p = filp->private_data;
7599 	char buf[64]; /* Not too big for a shallow stack */
7600 	int r;
7601 
7602 	r = scnprintf(buf, 63, "%ld", *p);
7603 	buf[r++] = '\n';
7604 
7605 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7606 }
7607 
7608 static const struct file_operations tracing_dyn_info_fops = {
7609 	.open		= tracing_open_generic,
7610 	.read		= tracing_read_dyn_info,
7611 	.llseek		= generic_file_llseek,
7612 };
7613 #endif /* CONFIG_DYNAMIC_FTRACE */
7614 
7615 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7616 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7617 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7618 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7619 		void *data)
7620 {
7621 	tracing_snapshot_instance(tr);
7622 }
7623 
7624 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7625 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7626 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7627 		      void *data)
7628 {
7629 	struct ftrace_func_mapper *mapper = data;
7630 	long *count = NULL;
7631 
7632 	if (mapper)
7633 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7634 
7635 	if (count) {
7636 
7637 		if (*count <= 0)
7638 			return;
7639 
7640 		(*count)--;
7641 	}
7642 
7643 	tracing_snapshot_instance(tr);
7644 }
7645 
7646 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7647 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7648 		      struct ftrace_probe_ops *ops, void *data)
7649 {
7650 	struct ftrace_func_mapper *mapper = data;
7651 	long *count = NULL;
7652 
7653 	seq_printf(m, "%ps:", (void *)ip);
7654 
7655 	seq_puts(m, "snapshot");
7656 
7657 	if (mapper)
7658 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7659 
7660 	if (count)
7661 		seq_printf(m, ":count=%ld\n", *count);
7662 	else
7663 		seq_puts(m, ":unlimited\n");
7664 
7665 	return 0;
7666 }
7667 
7668 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)7669 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7670 		     unsigned long ip, void *init_data, void **data)
7671 {
7672 	struct ftrace_func_mapper *mapper = *data;
7673 
7674 	if (!mapper) {
7675 		mapper = allocate_ftrace_func_mapper();
7676 		if (!mapper)
7677 			return -ENOMEM;
7678 		*data = mapper;
7679 	}
7680 
7681 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7682 }
7683 
7684 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)7685 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7686 		     unsigned long ip, void *data)
7687 {
7688 	struct ftrace_func_mapper *mapper = data;
7689 
7690 	if (!ip) {
7691 		if (!mapper)
7692 			return;
7693 		free_ftrace_func_mapper(mapper, NULL);
7694 		return;
7695 	}
7696 
7697 	ftrace_func_mapper_remove_ip(mapper, ip);
7698 }
7699 
7700 static struct ftrace_probe_ops snapshot_probe_ops = {
7701 	.func			= ftrace_snapshot,
7702 	.print			= ftrace_snapshot_print,
7703 };
7704 
7705 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7706 	.func			= ftrace_count_snapshot,
7707 	.print			= ftrace_snapshot_print,
7708 	.init			= ftrace_snapshot_init,
7709 	.free			= ftrace_snapshot_free,
7710 };
7711 
7712 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)7713 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7714 			       char *glob, char *cmd, char *param, int enable)
7715 {
7716 	struct ftrace_probe_ops *ops;
7717 	void *count = (void *)-1;
7718 	char *number;
7719 	int ret;
7720 
7721 	if (!tr)
7722 		return -ENODEV;
7723 
7724 	/* hash funcs only work with set_ftrace_filter */
7725 	if (!enable)
7726 		return -EINVAL;
7727 
7728 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7729 
7730 	if (glob[0] == '!')
7731 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7732 
7733 	if (!param)
7734 		goto out_reg;
7735 
7736 	number = strsep(&param, ":");
7737 
7738 	if (!strlen(number))
7739 		goto out_reg;
7740 
7741 	/*
7742 	 * We use the callback data field (which is a pointer)
7743 	 * as our counter.
7744 	 */
7745 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7746 	if (ret)
7747 		return ret;
7748 
7749  out_reg:
7750 	ret = tracing_alloc_snapshot_instance(tr);
7751 	if (ret < 0)
7752 		goto out;
7753 
7754 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7755 
7756  out:
7757 	return ret < 0 ? ret : 0;
7758 }
7759 
7760 static struct ftrace_func_command ftrace_snapshot_cmd = {
7761 	.name			= "snapshot",
7762 	.func			= ftrace_trace_snapshot_callback,
7763 };
7764 
register_snapshot_cmd(void)7765 static __init int register_snapshot_cmd(void)
7766 {
7767 	return register_ftrace_command(&ftrace_snapshot_cmd);
7768 }
7769 #else
register_snapshot_cmd(void)7770 static inline __init int register_snapshot_cmd(void) { return 0; }
7771 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7772 
tracing_get_dentry(struct trace_array * tr)7773 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7774 {
7775 	if (WARN_ON(!tr->dir))
7776 		return ERR_PTR(-ENODEV);
7777 
7778 	/* Top directory uses NULL as the parent */
7779 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7780 		return NULL;
7781 
7782 	/* All sub buffers have a descriptor */
7783 	return tr->dir;
7784 }
7785 
tracing_dentry_percpu(struct trace_array * tr,int cpu)7786 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7787 {
7788 	struct dentry *d_tracer;
7789 
7790 	if (tr->percpu_dir)
7791 		return tr->percpu_dir;
7792 
7793 	d_tracer = tracing_get_dentry(tr);
7794 	if (IS_ERR(d_tracer))
7795 		return NULL;
7796 
7797 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7798 
7799 	WARN_ONCE(!tr->percpu_dir,
7800 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7801 
7802 	return tr->percpu_dir;
7803 }
7804 
7805 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)7806 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7807 		      void *data, long cpu, const struct file_operations *fops)
7808 {
7809 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7810 
7811 	if (ret) /* See tracing_get_cpu() */
7812 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7813 	return ret;
7814 }
7815 
7816 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)7817 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7818 {
7819 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7820 	struct dentry *d_cpu;
7821 	char cpu_dir[30]; /* 30 characters should be more than enough */
7822 
7823 	if (!d_percpu)
7824 		return;
7825 
7826 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7827 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7828 	if (!d_cpu) {
7829 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7830 		return;
7831 	}
7832 
7833 	/* per cpu trace_pipe */
7834 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7835 				tr, cpu, &tracing_pipe_fops);
7836 
7837 	/* per cpu trace */
7838 	trace_create_cpu_file("trace", 0644, d_cpu,
7839 				tr, cpu, &tracing_fops);
7840 
7841 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7842 				tr, cpu, &tracing_buffers_fops);
7843 
7844 	trace_create_cpu_file("stats", 0444, d_cpu,
7845 				tr, cpu, &tracing_stats_fops);
7846 
7847 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7848 				tr, cpu, &tracing_entries_fops);
7849 
7850 #ifdef CONFIG_TRACER_SNAPSHOT
7851 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7852 				tr, cpu, &snapshot_fops);
7853 
7854 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7855 				tr, cpu, &snapshot_raw_fops);
7856 #endif
7857 }
7858 
7859 #ifdef CONFIG_FTRACE_SELFTEST
7860 /* Let selftest have access to static functions in this file */
7861 #include "trace_selftest.c"
7862 #endif
7863 
7864 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7865 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7866 			loff_t *ppos)
7867 {
7868 	struct trace_option_dentry *topt = filp->private_data;
7869 	char *buf;
7870 
7871 	if (topt->flags->val & topt->opt->bit)
7872 		buf = "1\n";
7873 	else
7874 		buf = "0\n";
7875 
7876 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7877 }
7878 
7879 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7880 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7881 			 loff_t *ppos)
7882 {
7883 	struct trace_option_dentry *topt = filp->private_data;
7884 	unsigned long val;
7885 	int ret;
7886 
7887 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7888 	if (ret)
7889 		return ret;
7890 
7891 	if (val != 0 && val != 1)
7892 		return -EINVAL;
7893 
7894 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7895 		mutex_lock(&trace_types_lock);
7896 		ret = __set_tracer_option(topt->tr, topt->flags,
7897 					  topt->opt, !val);
7898 		mutex_unlock(&trace_types_lock);
7899 		if (ret)
7900 			return ret;
7901 	}
7902 
7903 	*ppos += cnt;
7904 
7905 	return cnt;
7906 }
7907 
7908 
7909 static const struct file_operations trace_options_fops = {
7910 	.open = tracing_open_generic,
7911 	.read = trace_options_read,
7912 	.write = trace_options_write,
7913 	.llseek	= generic_file_llseek,
7914 };
7915 
7916 /*
7917  * In order to pass in both the trace_array descriptor as well as the index
7918  * to the flag that the trace option file represents, the trace_array
7919  * has a character array of trace_flags_index[], which holds the index
7920  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7921  * The address of this character array is passed to the flag option file
7922  * read/write callbacks.
7923  *
7924  * In order to extract both the index and the trace_array descriptor,
7925  * get_tr_index() uses the following algorithm.
7926  *
7927  *   idx = *ptr;
7928  *
7929  * As the pointer itself contains the address of the index (remember
7930  * index[1] == 1).
7931  *
7932  * Then to get the trace_array descriptor, by subtracting that index
7933  * from the ptr, we get to the start of the index itself.
7934  *
7935  *   ptr - idx == &index[0]
7936  *
7937  * Then a simple container_of() from that pointer gets us to the
7938  * trace_array descriptor.
7939  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)7940 static void get_tr_index(void *data, struct trace_array **ptr,
7941 			 unsigned int *pindex)
7942 {
7943 	*pindex = *(unsigned char *)data;
7944 
7945 	*ptr = container_of(data - *pindex, struct trace_array,
7946 			    trace_flags_index);
7947 }
7948 
7949 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7950 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7951 			loff_t *ppos)
7952 {
7953 	void *tr_index = filp->private_data;
7954 	struct trace_array *tr;
7955 	unsigned int index;
7956 	char *buf;
7957 
7958 	get_tr_index(tr_index, &tr, &index);
7959 
7960 	if (tr->trace_flags & (1 << index))
7961 		buf = "1\n";
7962 	else
7963 		buf = "0\n";
7964 
7965 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7966 }
7967 
7968 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7969 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7970 			 loff_t *ppos)
7971 {
7972 	void *tr_index = filp->private_data;
7973 	struct trace_array *tr;
7974 	unsigned int index;
7975 	unsigned long val;
7976 	int ret;
7977 
7978 	get_tr_index(tr_index, &tr, &index);
7979 
7980 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7981 	if (ret)
7982 		return ret;
7983 
7984 	if (val != 0 && val != 1)
7985 		return -EINVAL;
7986 
7987 	mutex_lock(&event_mutex);
7988 	mutex_lock(&trace_types_lock);
7989 	ret = set_tracer_flag(tr, 1 << index, val);
7990 	mutex_unlock(&trace_types_lock);
7991 	mutex_unlock(&event_mutex);
7992 
7993 	if (ret < 0)
7994 		return ret;
7995 
7996 	*ppos += cnt;
7997 
7998 	return cnt;
7999 }
8000 
8001 static const struct file_operations trace_options_core_fops = {
8002 	.open = tracing_open_generic,
8003 	.read = trace_options_core_read,
8004 	.write = trace_options_core_write,
8005 	.llseek = generic_file_llseek,
8006 };
8007 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8008 struct dentry *trace_create_file(const char *name,
8009 				 umode_t mode,
8010 				 struct dentry *parent,
8011 				 void *data,
8012 				 const struct file_operations *fops)
8013 {
8014 	struct dentry *ret;
8015 
8016 	ret = tracefs_create_file(name, mode, parent, data, fops);
8017 	if (!ret)
8018 		pr_warn("Could not create tracefs '%s' entry\n", name);
8019 
8020 	return ret;
8021 }
8022 
8023 
trace_options_init_dentry(struct trace_array * tr)8024 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8025 {
8026 	struct dentry *d_tracer;
8027 
8028 	if (tr->options)
8029 		return tr->options;
8030 
8031 	d_tracer = tracing_get_dentry(tr);
8032 	if (IS_ERR(d_tracer))
8033 		return NULL;
8034 
8035 	tr->options = tracefs_create_dir("options", d_tracer);
8036 	if (!tr->options) {
8037 		pr_warn("Could not create tracefs directory 'options'\n");
8038 		return NULL;
8039 	}
8040 
8041 	return tr->options;
8042 }
8043 
8044 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8045 create_trace_option_file(struct trace_array *tr,
8046 			 struct trace_option_dentry *topt,
8047 			 struct tracer_flags *flags,
8048 			 struct tracer_opt *opt)
8049 {
8050 	struct dentry *t_options;
8051 
8052 	t_options = trace_options_init_dentry(tr);
8053 	if (!t_options)
8054 		return;
8055 
8056 	topt->flags = flags;
8057 	topt->opt = opt;
8058 	topt->tr = tr;
8059 
8060 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8061 				    &trace_options_fops);
8062 
8063 }
8064 
8065 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8066 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8067 {
8068 	struct trace_option_dentry *topts;
8069 	struct trace_options *tr_topts;
8070 	struct tracer_flags *flags;
8071 	struct tracer_opt *opts;
8072 	int cnt;
8073 	int i;
8074 
8075 	if (!tracer)
8076 		return;
8077 
8078 	flags = tracer->flags;
8079 
8080 	if (!flags || !flags->opts)
8081 		return;
8082 
8083 	/*
8084 	 * If this is an instance, only create flags for tracers
8085 	 * the instance may have.
8086 	 */
8087 	if (!trace_ok_for_array(tracer, tr))
8088 		return;
8089 
8090 	for (i = 0; i < tr->nr_topts; i++) {
8091 		/* Make sure there's no duplicate flags. */
8092 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8093 			return;
8094 	}
8095 
8096 	opts = flags->opts;
8097 
8098 	for (cnt = 0; opts[cnt].name; cnt++)
8099 		;
8100 
8101 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8102 	if (!topts)
8103 		return;
8104 
8105 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8106 			    GFP_KERNEL);
8107 	if (!tr_topts) {
8108 		kfree(topts);
8109 		return;
8110 	}
8111 
8112 	tr->topts = tr_topts;
8113 	tr->topts[tr->nr_topts].tracer = tracer;
8114 	tr->topts[tr->nr_topts].topts = topts;
8115 	tr->nr_topts++;
8116 
8117 	for (cnt = 0; opts[cnt].name; cnt++) {
8118 		create_trace_option_file(tr, &topts[cnt], flags,
8119 					 &opts[cnt]);
8120 		WARN_ONCE(topts[cnt].entry == NULL,
8121 			  "Failed to create trace option: %s",
8122 			  opts[cnt].name);
8123 	}
8124 }
8125 
8126 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8127 create_trace_option_core_file(struct trace_array *tr,
8128 			      const char *option, long index)
8129 {
8130 	struct dentry *t_options;
8131 
8132 	t_options = trace_options_init_dentry(tr);
8133 	if (!t_options)
8134 		return NULL;
8135 
8136 	return trace_create_file(option, 0644, t_options,
8137 				 (void *)&tr->trace_flags_index[index],
8138 				 &trace_options_core_fops);
8139 }
8140 
create_trace_options_dir(struct trace_array * tr)8141 static void create_trace_options_dir(struct trace_array *tr)
8142 {
8143 	struct dentry *t_options;
8144 	bool top_level = tr == &global_trace;
8145 	int i;
8146 
8147 	t_options = trace_options_init_dentry(tr);
8148 	if (!t_options)
8149 		return;
8150 
8151 	for (i = 0; trace_options[i]; i++) {
8152 		if (top_level ||
8153 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8154 			create_trace_option_core_file(tr, trace_options[i], i);
8155 	}
8156 }
8157 
8158 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8159 rb_simple_read(struct file *filp, char __user *ubuf,
8160 	       size_t cnt, loff_t *ppos)
8161 {
8162 	struct trace_array *tr = filp->private_data;
8163 	char buf[64];
8164 	int r;
8165 
8166 	r = tracer_tracing_is_on(tr);
8167 	r = sprintf(buf, "%d\n", r);
8168 
8169 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8170 }
8171 
8172 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8173 rb_simple_write(struct file *filp, const char __user *ubuf,
8174 		size_t cnt, loff_t *ppos)
8175 {
8176 	struct trace_array *tr = filp->private_data;
8177 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
8178 	unsigned long val;
8179 	int ret;
8180 
8181 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8182 	if (ret)
8183 		return ret;
8184 
8185 	if (buffer) {
8186 		mutex_lock(&trace_types_lock);
8187 		if (!!val == tracer_tracing_is_on(tr)) {
8188 			val = 0; /* do nothing */
8189 		} else if (val) {
8190 			tracer_tracing_on(tr);
8191 			if (tr->current_trace->start)
8192 				tr->current_trace->start(tr);
8193 		} else {
8194 			tracer_tracing_off(tr);
8195 			if (tr->current_trace->stop)
8196 				tr->current_trace->stop(tr);
8197 		}
8198 		mutex_unlock(&trace_types_lock);
8199 	}
8200 
8201 	(*ppos)++;
8202 
8203 	return cnt;
8204 }
8205 
8206 static const struct file_operations rb_simple_fops = {
8207 	.open		= tracing_open_generic_tr,
8208 	.read		= rb_simple_read,
8209 	.write		= rb_simple_write,
8210 	.release	= tracing_release_generic_tr,
8211 	.llseek		= default_llseek,
8212 };
8213 
8214 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8215 buffer_percent_read(struct file *filp, char __user *ubuf,
8216 		    size_t cnt, loff_t *ppos)
8217 {
8218 	struct trace_array *tr = filp->private_data;
8219 	char buf[64];
8220 	int r;
8221 
8222 	r = tr->buffer_percent;
8223 	r = sprintf(buf, "%d\n", r);
8224 
8225 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8226 }
8227 
8228 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8229 buffer_percent_write(struct file *filp, const char __user *ubuf,
8230 		     size_t cnt, loff_t *ppos)
8231 {
8232 	struct trace_array *tr = filp->private_data;
8233 	unsigned long val;
8234 	int ret;
8235 
8236 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8237 	if (ret)
8238 		return ret;
8239 
8240 	if (val > 100)
8241 		return -EINVAL;
8242 
8243 	if (!val)
8244 		val = 1;
8245 
8246 	tr->buffer_percent = val;
8247 
8248 	(*ppos)++;
8249 
8250 	return cnt;
8251 }
8252 
8253 static const struct file_operations buffer_percent_fops = {
8254 	.open		= tracing_open_generic_tr,
8255 	.read		= buffer_percent_read,
8256 	.write		= buffer_percent_write,
8257 	.release	= tracing_release_generic_tr,
8258 	.llseek		= default_llseek,
8259 };
8260 
8261 static struct dentry *trace_instance_dir;
8262 
8263 static void
8264 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8265 
8266 static int
allocate_trace_buffer(struct trace_array * tr,struct trace_buffer * buf,int size)8267 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8268 {
8269 	enum ring_buffer_flags rb_flags;
8270 
8271 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8272 
8273 	buf->tr = tr;
8274 
8275 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8276 	if (!buf->buffer)
8277 		return -ENOMEM;
8278 
8279 	buf->data = alloc_percpu(struct trace_array_cpu);
8280 	if (!buf->data) {
8281 		ring_buffer_free(buf->buffer);
8282 		buf->buffer = NULL;
8283 		return -ENOMEM;
8284 	}
8285 
8286 	/* Allocate the first page for all buffers */
8287 	set_buffer_entries(&tr->trace_buffer,
8288 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
8289 
8290 	return 0;
8291 }
8292 
allocate_trace_buffers(struct trace_array * tr,int size)8293 static int allocate_trace_buffers(struct trace_array *tr, int size)
8294 {
8295 	int ret;
8296 
8297 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8298 	if (ret)
8299 		return ret;
8300 
8301 #ifdef CONFIG_TRACER_MAX_TRACE
8302 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8303 				    allocate_snapshot ? size : 1);
8304 	if (WARN_ON(ret)) {
8305 		ring_buffer_free(tr->trace_buffer.buffer);
8306 		tr->trace_buffer.buffer = NULL;
8307 		free_percpu(tr->trace_buffer.data);
8308 		tr->trace_buffer.data = NULL;
8309 		return -ENOMEM;
8310 	}
8311 	tr->allocated_snapshot = allocate_snapshot;
8312 
8313 	/*
8314 	 * Only the top level trace array gets its snapshot allocated
8315 	 * from the kernel command line.
8316 	 */
8317 	allocate_snapshot = false;
8318 #endif
8319 	return 0;
8320 }
8321 
free_trace_buffer(struct trace_buffer * buf)8322 static void free_trace_buffer(struct trace_buffer *buf)
8323 {
8324 	if (buf->buffer) {
8325 		ring_buffer_free(buf->buffer);
8326 		buf->buffer = NULL;
8327 		free_percpu(buf->data);
8328 		buf->data = NULL;
8329 	}
8330 }
8331 
free_trace_buffers(struct trace_array * tr)8332 static void free_trace_buffers(struct trace_array *tr)
8333 {
8334 	if (!tr)
8335 		return;
8336 
8337 	free_trace_buffer(&tr->trace_buffer);
8338 
8339 #ifdef CONFIG_TRACER_MAX_TRACE
8340 	free_trace_buffer(&tr->max_buffer);
8341 #endif
8342 }
8343 
init_trace_flags_index(struct trace_array * tr)8344 static void init_trace_flags_index(struct trace_array *tr)
8345 {
8346 	int i;
8347 
8348 	/* Used by the trace options files */
8349 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8350 		tr->trace_flags_index[i] = i;
8351 }
8352 
__update_tracer_options(struct trace_array * tr)8353 static void __update_tracer_options(struct trace_array *tr)
8354 {
8355 	struct tracer *t;
8356 
8357 	for (t = trace_types; t; t = t->next)
8358 		add_tracer_options(tr, t);
8359 }
8360 
update_tracer_options(struct trace_array * tr)8361 static void update_tracer_options(struct trace_array *tr)
8362 {
8363 	mutex_lock(&trace_types_lock);
8364 	__update_tracer_options(tr);
8365 	mutex_unlock(&trace_types_lock);
8366 }
8367 
trace_array_create(const char * name)8368 struct trace_array *trace_array_create(const char *name)
8369 {
8370 	struct trace_array *tr;
8371 	int ret;
8372 
8373 	mutex_lock(&event_mutex);
8374 	mutex_lock(&trace_types_lock);
8375 
8376 	ret = -EEXIST;
8377 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8378 		if (tr->name && strcmp(tr->name, name) == 0)
8379 			goto out_unlock;
8380 	}
8381 
8382 	ret = -ENOMEM;
8383 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8384 	if (!tr)
8385 		goto out_unlock;
8386 
8387 	tr->name = kstrdup(name, GFP_KERNEL);
8388 	if (!tr->name)
8389 		goto out_free_tr;
8390 
8391 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8392 		goto out_free_tr;
8393 
8394 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8395 
8396 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8397 
8398 	raw_spin_lock_init(&tr->start_lock);
8399 
8400 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8401 
8402 	tr->current_trace = &nop_trace;
8403 
8404 	INIT_LIST_HEAD(&tr->systems);
8405 	INIT_LIST_HEAD(&tr->events);
8406 	INIT_LIST_HEAD(&tr->hist_vars);
8407 	INIT_LIST_HEAD(&tr->err_log);
8408 
8409 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8410 		goto out_free_tr;
8411 
8412 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8413 	if (!tr->dir)
8414 		goto out_free_tr;
8415 
8416 	ret = event_trace_add_tracer(tr->dir, tr);
8417 	if (ret) {
8418 		tracefs_remove_recursive(tr->dir);
8419 		goto out_free_tr;
8420 	}
8421 
8422 	ftrace_init_trace_array(tr);
8423 
8424 	init_tracer_tracefs(tr, tr->dir);
8425 	init_trace_flags_index(tr);
8426 	__update_tracer_options(tr);
8427 
8428 	list_add(&tr->list, &ftrace_trace_arrays);
8429 
8430 	mutex_unlock(&trace_types_lock);
8431 	mutex_unlock(&event_mutex);
8432 
8433 	return tr;
8434 
8435  out_free_tr:
8436 	free_trace_buffers(tr);
8437 	free_cpumask_var(tr->tracing_cpumask);
8438 	kfree(tr->name);
8439 	kfree(tr);
8440 
8441  out_unlock:
8442 	mutex_unlock(&trace_types_lock);
8443 	mutex_unlock(&event_mutex);
8444 
8445 	return ERR_PTR(ret);
8446 }
8447 EXPORT_SYMBOL_GPL(trace_array_create);
8448 
instance_mkdir(const char * name)8449 static int instance_mkdir(const char *name)
8450 {
8451 	return PTR_ERR_OR_ZERO(trace_array_create(name));
8452 }
8453 
__remove_instance(struct trace_array * tr)8454 static int __remove_instance(struct trace_array *tr)
8455 {
8456 	int i;
8457 
8458 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8459 		return -EBUSY;
8460 
8461 	list_del(&tr->list);
8462 
8463 	/* Disable all the flags that were enabled coming in */
8464 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8465 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8466 			set_tracer_flag(tr, 1 << i, 0);
8467 	}
8468 
8469 	tracing_set_nop(tr);
8470 	clear_ftrace_function_probes(tr);
8471 	event_trace_del_tracer(tr);
8472 	ftrace_clear_pids(tr);
8473 	ftrace_destroy_function_files(tr);
8474 	tracefs_remove_recursive(tr->dir);
8475 	free_trace_buffers(tr);
8476 
8477 	for (i = 0; i < tr->nr_topts; i++) {
8478 		kfree(tr->topts[i].topts);
8479 	}
8480 	kfree(tr->topts);
8481 
8482 	free_cpumask_var(tr->tracing_cpumask);
8483 	kfree(tr->name);
8484 	kfree(tr);
8485 	tr = NULL;
8486 
8487 	return 0;
8488 }
8489 
trace_array_destroy(struct trace_array * tr)8490 int trace_array_destroy(struct trace_array *tr)
8491 {
8492 	int ret;
8493 
8494 	if (!tr)
8495 		return -EINVAL;
8496 
8497 	mutex_lock(&event_mutex);
8498 	mutex_lock(&trace_types_lock);
8499 
8500 	ret = __remove_instance(tr);
8501 
8502 	mutex_unlock(&trace_types_lock);
8503 	mutex_unlock(&event_mutex);
8504 
8505 	return ret;
8506 }
8507 EXPORT_SYMBOL_GPL(trace_array_destroy);
8508 
instance_rmdir(const char * name)8509 static int instance_rmdir(const char *name)
8510 {
8511 	struct trace_array *tr;
8512 	int ret;
8513 
8514 	mutex_lock(&event_mutex);
8515 	mutex_lock(&trace_types_lock);
8516 
8517 	ret = -ENODEV;
8518 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8519 		if (tr->name && strcmp(tr->name, name) == 0) {
8520 			ret = __remove_instance(tr);
8521 			break;
8522 		}
8523 	}
8524 
8525 	mutex_unlock(&trace_types_lock);
8526 	mutex_unlock(&event_mutex);
8527 
8528 	return ret;
8529 }
8530 
create_trace_instances(struct dentry * d_tracer)8531 static __init void create_trace_instances(struct dentry *d_tracer)
8532 {
8533 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8534 							 instance_mkdir,
8535 							 instance_rmdir);
8536 	if (WARN_ON(!trace_instance_dir))
8537 		return;
8538 }
8539 
8540 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)8541 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8542 {
8543 	struct trace_event_file *file;
8544 	int cpu;
8545 
8546 	trace_create_file("available_tracers", 0444, d_tracer,
8547 			tr, &show_traces_fops);
8548 
8549 	trace_create_file("current_tracer", 0644, d_tracer,
8550 			tr, &set_tracer_fops);
8551 
8552 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8553 			  tr, &tracing_cpumask_fops);
8554 
8555 	trace_create_file("trace_options", 0644, d_tracer,
8556 			  tr, &tracing_iter_fops);
8557 
8558 	trace_create_file("trace", 0644, d_tracer,
8559 			  tr, &tracing_fops);
8560 
8561 	trace_create_file("trace_pipe", 0444, d_tracer,
8562 			  tr, &tracing_pipe_fops);
8563 
8564 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8565 			  tr, &tracing_entries_fops);
8566 
8567 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8568 			  tr, &tracing_total_entries_fops);
8569 
8570 	trace_create_file("free_buffer", 0200, d_tracer,
8571 			  tr, &tracing_free_buffer_fops);
8572 
8573 	trace_create_file("trace_marker", 0220, d_tracer,
8574 			  tr, &tracing_mark_fops);
8575 
8576 	file = __find_event_file(tr, "ftrace", "print");
8577 	if (file && file->dir)
8578 		trace_create_file("trigger", 0644, file->dir, file,
8579 				  &event_trigger_fops);
8580 	tr->trace_marker_file = file;
8581 
8582 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8583 			  tr, &tracing_mark_raw_fops);
8584 
8585 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8586 			  &trace_clock_fops);
8587 
8588 	trace_create_file("tracing_on", 0644, d_tracer,
8589 			  tr, &rb_simple_fops);
8590 
8591 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8592 			  &trace_time_stamp_mode_fops);
8593 
8594 	tr->buffer_percent = 50;
8595 
8596 	trace_create_file("buffer_percent", 0444, d_tracer,
8597 			tr, &buffer_percent_fops);
8598 
8599 	create_trace_options_dir(tr);
8600 
8601 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8602 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8603 			&tr->max_latency, &tracing_max_lat_fops);
8604 #endif
8605 
8606 	if (ftrace_create_function_files(tr, d_tracer))
8607 		WARN(1, "Could not allocate function filter files");
8608 
8609 #ifdef CONFIG_TRACER_SNAPSHOT
8610 	trace_create_file("snapshot", 0644, d_tracer,
8611 			  tr, &snapshot_fops);
8612 #endif
8613 
8614 	trace_create_file("error_log", 0644, d_tracer,
8615 			  tr, &tracing_err_log_fops);
8616 
8617 	for_each_tracing_cpu(cpu)
8618 		tracing_init_tracefs_percpu(tr, cpu);
8619 
8620 	ftrace_init_tracefs(tr, d_tracer);
8621 }
8622 
trace_automount(struct dentry * mntpt,void * ingore)8623 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8624 {
8625 	struct vfsmount *mnt;
8626 	struct file_system_type *type;
8627 
8628 	/*
8629 	 * To maintain backward compatibility for tools that mount
8630 	 * debugfs to get to the tracing facility, tracefs is automatically
8631 	 * mounted to the debugfs/tracing directory.
8632 	 */
8633 	type = get_fs_type("tracefs");
8634 	if (!type)
8635 		return NULL;
8636 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8637 	put_filesystem(type);
8638 	if (IS_ERR(mnt))
8639 		return NULL;
8640 	mntget(mnt);
8641 
8642 	return mnt;
8643 }
8644 
8645 /**
8646  * tracing_init_dentry - initialize top level trace array
8647  *
8648  * This is called when creating files or directories in the tracing
8649  * directory. It is called via fs_initcall() by any of the boot up code
8650  * and expects to return the dentry of the top level tracing directory.
8651  */
tracing_init_dentry(void)8652 struct dentry *tracing_init_dentry(void)
8653 {
8654 	struct trace_array *tr = &global_trace;
8655 
8656 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
8657 		pr_warning("Tracing disabled due to lockdown\n");
8658 		return ERR_PTR(-EPERM);
8659 	}
8660 
8661 	/* The top level trace array uses  NULL as parent */
8662 	if (tr->dir)
8663 		return NULL;
8664 
8665 	if (WARN_ON(!tracefs_initialized()) ||
8666 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8667 		 WARN_ON(!debugfs_initialized())))
8668 		return ERR_PTR(-ENODEV);
8669 
8670 	/*
8671 	 * As there may still be users that expect the tracing
8672 	 * files to exist in debugfs/tracing, we must automount
8673 	 * the tracefs file system there, so older tools still
8674 	 * work with the newer kerenl.
8675 	 */
8676 	tr->dir = debugfs_create_automount("tracing", NULL,
8677 					   trace_automount, NULL);
8678 
8679 	return NULL;
8680 }
8681 
8682 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8683 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8684 
trace_eval_init(void)8685 static void __init trace_eval_init(void)
8686 {
8687 	int len;
8688 
8689 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8690 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8691 }
8692 
8693 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)8694 static void trace_module_add_evals(struct module *mod)
8695 {
8696 	if (!mod->num_trace_evals)
8697 		return;
8698 
8699 	/*
8700 	 * Modules with bad taint do not have events created, do
8701 	 * not bother with enums either.
8702 	 */
8703 	if (trace_module_has_bad_taint(mod))
8704 		return;
8705 
8706 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8707 }
8708 
8709 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)8710 static void trace_module_remove_evals(struct module *mod)
8711 {
8712 	union trace_eval_map_item *map;
8713 	union trace_eval_map_item **last = &trace_eval_maps;
8714 
8715 	if (!mod->num_trace_evals)
8716 		return;
8717 
8718 	mutex_lock(&trace_eval_mutex);
8719 
8720 	map = trace_eval_maps;
8721 
8722 	while (map) {
8723 		if (map->head.mod == mod)
8724 			break;
8725 		map = trace_eval_jmp_to_tail(map);
8726 		last = &map->tail.next;
8727 		map = map->tail.next;
8728 	}
8729 	if (!map)
8730 		goto out;
8731 
8732 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8733 	kfree(map);
8734  out:
8735 	mutex_unlock(&trace_eval_mutex);
8736 }
8737 #else
trace_module_remove_evals(struct module * mod)8738 static inline void trace_module_remove_evals(struct module *mod) { }
8739 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8740 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)8741 static int trace_module_notify(struct notifier_block *self,
8742 			       unsigned long val, void *data)
8743 {
8744 	struct module *mod = data;
8745 
8746 	switch (val) {
8747 	case MODULE_STATE_COMING:
8748 		trace_module_add_evals(mod);
8749 		break;
8750 	case MODULE_STATE_GOING:
8751 		trace_module_remove_evals(mod);
8752 		break;
8753 	}
8754 
8755 	return 0;
8756 }
8757 
8758 static struct notifier_block trace_module_nb = {
8759 	.notifier_call = trace_module_notify,
8760 	.priority = 0,
8761 };
8762 #endif /* CONFIG_MODULES */
8763 
tracer_init_tracefs(void)8764 static __init int tracer_init_tracefs(void)
8765 {
8766 	struct dentry *d_tracer;
8767 
8768 	trace_access_lock_init();
8769 
8770 	d_tracer = tracing_init_dentry();
8771 	if (IS_ERR(d_tracer))
8772 		return 0;
8773 
8774 	event_trace_init();
8775 
8776 	init_tracer_tracefs(&global_trace, d_tracer);
8777 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8778 
8779 	trace_create_file("tracing_thresh", 0644, d_tracer,
8780 			&global_trace, &tracing_thresh_fops);
8781 
8782 	trace_create_file("README", 0444, d_tracer,
8783 			NULL, &tracing_readme_fops);
8784 
8785 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8786 			NULL, &tracing_saved_cmdlines_fops);
8787 
8788 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8789 			  NULL, &tracing_saved_cmdlines_size_fops);
8790 
8791 	trace_create_file("saved_tgids", 0444, d_tracer,
8792 			NULL, &tracing_saved_tgids_fops);
8793 
8794 	trace_eval_init();
8795 
8796 	trace_create_eval_file(d_tracer);
8797 
8798 #ifdef CONFIG_MODULES
8799 	register_module_notifier(&trace_module_nb);
8800 #endif
8801 
8802 #ifdef CONFIG_DYNAMIC_FTRACE
8803 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8804 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8805 #endif
8806 
8807 	create_trace_instances(d_tracer);
8808 
8809 	update_tracer_options(&global_trace);
8810 
8811 	return 0;
8812 }
8813 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)8814 static int trace_panic_handler(struct notifier_block *this,
8815 			       unsigned long event, void *unused)
8816 {
8817 	if (ftrace_dump_on_oops)
8818 		ftrace_dump(ftrace_dump_on_oops);
8819 	return NOTIFY_OK;
8820 }
8821 
8822 static struct notifier_block trace_panic_notifier = {
8823 	.notifier_call  = trace_panic_handler,
8824 	.next           = NULL,
8825 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8826 };
8827 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)8828 static int trace_die_handler(struct notifier_block *self,
8829 			     unsigned long val,
8830 			     void *data)
8831 {
8832 	switch (val) {
8833 	case DIE_OOPS:
8834 		if (ftrace_dump_on_oops)
8835 			ftrace_dump(ftrace_dump_on_oops);
8836 		break;
8837 	default:
8838 		break;
8839 	}
8840 	return NOTIFY_OK;
8841 }
8842 
8843 static struct notifier_block trace_die_notifier = {
8844 	.notifier_call = trace_die_handler,
8845 	.priority = 200
8846 };
8847 
8848 /*
8849  * printk is set to max of 1024, we really don't need it that big.
8850  * Nothing should be printing 1000 characters anyway.
8851  */
8852 #define TRACE_MAX_PRINT		1000
8853 
8854 /*
8855  * Define here KERN_TRACE so that we have one place to modify
8856  * it if we decide to change what log level the ftrace dump
8857  * should be at.
8858  */
8859 #define KERN_TRACE		KERN_EMERG
8860 
8861 void
trace_printk_seq(struct trace_seq * s)8862 trace_printk_seq(struct trace_seq *s)
8863 {
8864 	/* Probably should print a warning here. */
8865 	if (s->seq.len >= TRACE_MAX_PRINT)
8866 		s->seq.len = TRACE_MAX_PRINT;
8867 
8868 	/*
8869 	 * More paranoid code. Although the buffer size is set to
8870 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8871 	 * an extra layer of protection.
8872 	 */
8873 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8874 		s->seq.len = s->seq.size - 1;
8875 
8876 	/* should be zero ended, but we are paranoid. */
8877 	s->buffer[s->seq.len] = 0;
8878 
8879 	printk(KERN_TRACE "%s", s->buffer);
8880 
8881 	trace_seq_init(s);
8882 }
8883 
trace_init_global_iter(struct trace_iterator * iter)8884 void trace_init_global_iter(struct trace_iterator *iter)
8885 {
8886 	iter->tr = &global_trace;
8887 	iter->trace = iter->tr->current_trace;
8888 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8889 	iter->trace_buffer = &global_trace.trace_buffer;
8890 
8891 	if (iter->trace && iter->trace->open)
8892 		iter->trace->open(iter);
8893 
8894 	/* Annotate start of buffers if we had overruns */
8895 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8896 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8897 
8898 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8899 	if (trace_clocks[iter->tr->clock_id].in_ns)
8900 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8901 }
8902 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)8903 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8904 {
8905 	/* use static because iter can be a bit big for the stack */
8906 	static struct trace_iterator iter;
8907 	static atomic_t dump_running;
8908 	struct trace_array *tr = &global_trace;
8909 	unsigned int old_userobj;
8910 	unsigned long flags;
8911 	int cnt = 0, cpu;
8912 
8913 	/* Only allow one dump user at a time. */
8914 	if (atomic_inc_return(&dump_running) != 1) {
8915 		atomic_dec(&dump_running);
8916 		return;
8917 	}
8918 
8919 	/*
8920 	 * Always turn off tracing when we dump.
8921 	 * We don't need to show trace output of what happens
8922 	 * between multiple crashes.
8923 	 *
8924 	 * If the user does a sysrq-z, then they can re-enable
8925 	 * tracing with echo 1 > tracing_on.
8926 	 */
8927 	tracing_off();
8928 
8929 	local_irq_save(flags);
8930 	printk_nmi_direct_enter();
8931 
8932 	/* Simulate the iterator */
8933 	trace_init_global_iter(&iter);
8934 
8935 	for_each_tracing_cpu(cpu) {
8936 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8937 	}
8938 
8939 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8940 
8941 	/* don't look at user memory in panic mode */
8942 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8943 
8944 	switch (oops_dump_mode) {
8945 	case DUMP_ALL:
8946 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8947 		break;
8948 	case DUMP_ORIG:
8949 		iter.cpu_file = raw_smp_processor_id();
8950 		break;
8951 	case DUMP_NONE:
8952 		goto out_enable;
8953 	default:
8954 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8955 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8956 	}
8957 
8958 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8959 
8960 	/* Did function tracer already get disabled? */
8961 	if (ftrace_is_dead()) {
8962 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8963 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8964 	}
8965 
8966 	/*
8967 	 * We need to stop all tracing on all CPUS to read the
8968 	 * the next buffer. This is a bit expensive, but is
8969 	 * not done often. We fill all what we can read,
8970 	 * and then release the locks again.
8971 	 */
8972 
8973 	while (!trace_empty(&iter)) {
8974 
8975 		if (!cnt)
8976 			printk(KERN_TRACE "---------------------------------\n");
8977 
8978 		cnt++;
8979 
8980 		trace_iterator_reset(&iter);
8981 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8982 
8983 		if (trace_find_next_entry_inc(&iter) != NULL) {
8984 			int ret;
8985 
8986 			ret = print_trace_line(&iter);
8987 			if (ret != TRACE_TYPE_NO_CONSUME)
8988 				trace_consume(&iter);
8989 		}
8990 		touch_nmi_watchdog();
8991 
8992 		trace_printk_seq(&iter.seq);
8993 	}
8994 
8995 	if (!cnt)
8996 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8997 	else
8998 		printk(KERN_TRACE "---------------------------------\n");
8999 
9000  out_enable:
9001 	tr->trace_flags |= old_userobj;
9002 
9003 	for_each_tracing_cpu(cpu) {
9004 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9005 	}
9006 	atomic_dec(&dump_running);
9007 	printk_nmi_direct_exit();
9008 	local_irq_restore(flags);
9009 }
9010 EXPORT_SYMBOL_GPL(ftrace_dump);
9011 
trace_run_command(const char * buf,int (* createfn)(int,char **))9012 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9013 {
9014 	char **argv;
9015 	int argc, ret;
9016 
9017 	argc = 0;
9018 	ret = 0;
9019 	argv = argv_split(GFP_KERNEL, buf, &argc);
9020 	if (!argv)
9021 		return -ENOMEM;
9022 
9023 	if (argc)
9024 		ret = createfn(argc, argv);
9025 
9026 	argv_free(argv);
9027 
9028 	return ret;
9029 }
9030 
9031 #define WRITE_BUFSIZE  4096
9032 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9033 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9034 				size_t count, loff_t *ppos,
9035 				int (*createfn)(int, char **))
9036 {
9037 	char *kbuf, *buf, *tmp;
9038 	int ret = 0;
9039 	size_t done = 0;
9040 	size_t size;
9041 
9042 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9043 	if (!kbuf)
9044 		return -ENOMEM;
9045 
9046 	while (done < count) {
9047 		size = count - done;
9048 
9049 		if (size >= WRITE_BUFSIZE)
9050 			size = WRITE_BUFSIZE - 1;
9051 
9052 		if (copy_from_user(kbuf, buffer + done, size)) {
9053 			ret = -EFAULT;
9054 			goto out;
9055 		}
9056 		kbuf[size] = '\0';
9057 		buf = kbuf;
9058 		do {
9059 			tmp = strchr(buf, '\n');
9060 			if (tmp) {
9061 				*tmp = '\0';
9062 				size = tmp - buf + 1;
9063 			} else {
9064 				size = strlen(buf);
9065 				if (done + size < count) {
9066 					if (buf != kbuf)
9067 						break;
9068 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9069 					pr_warn("Line length is too long: Should be less than %d\n",
9070 						WRITE_BUFSIZE - 2);
9071 					ret = -EINVAL;
9072 					goto out;
9073 				}
9074 			}
9075 			done += size;
9076 
9077 			/* Remove comments */
9078 			tmp = strchr(buf, '#');
9079 
9080 			if (tmp)
9081 				*tmp = '\0';
9082 
9083 			ret = trace_run_command(buf, createfn);
9084 			if (ret)
9085 				goto out;
9086 			buf += size;
9087 
9088 		} while (done < count);
9089 	}
9090 	ret = done;
9091 
9092 out:
9093 	kfree(kbuf);
9094 
9095 	return ret;
9096 }
9097 
tracer_alloc_buffers(void)9098 __init static int tracer_alloc_buffers(void)
9099 {
9100 	int ring_buf_size;
9101 	int ret = -ENOMEM;
9102 
9103 
9104 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9105 		pr_warning("Tracing disabled due to lockdown\n");
9106 		return -EPERM;
9107 	}
9108 
9109 	/*
9110 	 * Make sure we don't accidently add more trace options
9111 	 * than we have bits for.
9112 	 */
9113 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9114 
9115 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9116 		goto out;
9117 
9118 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9119 		goto out_free_buffer_mask;
9120 
9121 	/* Only allocate trace_printk buffers if a trace_printk exists */
9122 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9123 		/* Must be called before global_trace.buffer is allocated */
9124 		trace_printk_init_buffers();
9125 
9126 	/* To save memory, keep the ring buffer size to its minimum */
9127 	if (ring_buffer_expanded)
9128 		ring_buf_size = trace_buf_size;
9129 	else
9130 		ring_buf_size = 1;
9131 
9132 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9133 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9134 
9135 	raw_spin_lock_init(&global_trace.start_lock);
9136 
9137 	/*
9138 	 * The prepare callbacks allocates some memory for the ring buffer. We
9139 	 * don't free the buffer if the if the CPU goes down. If we were to free
9140 	 * the buffer, then the user would lose any trace that was in the
9141 	 * buffer. The memory will be removed once the "instance" is removed.
9142 	 */
9143 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9144 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9145 				      NULL);
9146 	if (ret < 0)
9147 		goto out_free_cpumask;
9148 	/* Used for event triggers */
9149 	ret = -ENOMEM;
9150 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9151 	if (!temp_buffer)
9152 		goto out_rm_hp_state;
9153 
9154 	if (trace_create_savedcmd() < 0)
9155 		goto out_free_temp_buffer;
9156 
9157 	/* TODO: make the number of buffers hot pluggable with CPUS */
9158 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9159 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9160 		WARN_ON(1);
9161 		goto out_free_savedcmd;
9162 	}
9163 
9164 	if (global_trace.buffer_disabled)
9165 		tracing_off();
9166 
9167 	if (trace_boot_clock) {
9168 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9169 		if (ret < 0)
9170 			pr_warn("Trace clock %s not defined, going back to default\n",
9171 				trace_boot_clock);
9172 	}
9173 
9174 	/*
9175 	 * register_tracer() might reference current_trace, so it
9176 	 * needs to be set before we register anything. This is
9177 	 * just a bootstrap of current_trace anyway.
9178 	 */
9179 	global_trace.current_trace = &nop_trace;
9180 
9181 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9182 
9183 	ftrace_init_global_array_ops(&global_trace);
9184 
9185 	init_trace_flags_index(&global_trace);
9186 
9187 	register_tracer(&nop_trace);
9188 
9189 	/* Function tracing may start here (via kernel command line) */
9190 	init_function_trace();
9191 
9192 	/* All seems OK, enable tracing */
9193 	tracing_disabled = 0;
9194 
9195 	atomic_notifier_chain_register(&panic_notifier_list,
9196 				       &trace_panic_notifier);
9197 
9198 	register_die_notifier(&trace_die_notifier);
9199 
9200 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9201 
9202 	INIT_LIST_HEAD(&global_trace.systems);
9203 	INIT_LIST_HEAD(&global_trace.events);
9204 	INIT_LIST_HEAD(&global_trace.hist_vars);
9205 	INIT_LIST_HEAD(&global_trace.err_log);
9206 	list_add(&global_trace.list, &ftrace_trace_arrays);
9207 
9208 	apply_trace_boot_options();
9209 
9210 	register_snapshot_cmd();
9211 
9212 	return 0;
9213 
9214 out_free_savedcmd:
9215 	free_saved_cmdlines_buffer(savedcmd);
9216 out_free_temp_buffer:
9217 	ring_buffer_free(temp_buffer);
9218 out_rm_hp_state:
9219 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9220 out_free_cpumask:
9221 	free_cpumask_var(global_trace.tracing_cpumask);
9222 out_free_buffer_mask:
9223 	free_cpumask_var(tracing_buffer_mask);
9224 out:
9225 	return ret;
9226 }
9227 
early_trace_init(void)9228 void __init early_trace_init(void)
9229 {
9230 	if (tracepoint_printk) {
9231 		tracepoint_print_iter =
9232 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9233 		if (WARN_ON(!tracepoint_print_iter))
9234 			tracepoint_printk = 0;
9235 		else
9236 			static_key_enable(&tracepoint_printk_key.key);
9237 	}
9238 	tracer_alloc_buffers();
9239 }
9240 
trace_init(void)9241 void __init trace_init(void)
9242 {
9243 	trace_event_init();
9244 }
9245 
clear_boot_tracer(void)9246 __init static int clear_boot_tracer(void)
9247 {
9248 	/*
9249 	 * The default tracer at boot buffer is an init section.
9250 	 * This function is called in lateinit. If we did not
9251 	 * find the boot tracer, then clear it out, to prevent
9252 	 * later registration from accessing the buffer that is
9253 	 * about to be freed.
9254 	 */
9255 	if (!default_bootup_tracer)
9256 		return 0;
9257 
9258 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9259 	       default_bootup_tracer);
9260 	default_bootup_tracer = NULL;
9261 
9262 	return 0;
9263 }
9264 
9265 fs_initcall(tracer_init_tracefs);
9266 late_initcall_sync(clear_boot_tracer);
9267 
9268 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9269 __init static int tracing_set_default_clock(void)
9270 {
9271 	/* sched_clock_stable() is determined in late_initcall */
9272 	if (!trace_boot_clock && !sched_clock_stable()) {
9273 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9274 			pr_warn("Can not set tracing clock due to lockdown\n");
9275 			return -EPERM;
9276 		}
9277 
9278 		printk(KERN_WARNING
9279 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9280 		       "If you want to keep using the local clock, then add:\n"
9281 		       "  \"trace_clock=local\"\n"
9282 		       "on the kernel command line\n");
9283 		tracing_set_clock(&global_trace, "global");
9284 	}
9285 
9286 	return 0;
9287 }
9288 late_initcall_sync(tracing_set_default_clock);
9289 #endif
9290