• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44 
45 #include "trace.h"
46 #include "trace_output.h"
47 
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53 
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62 
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67 
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71 
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 	{ }
75 };
76 
77 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 	return 0;
81 }
82 
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89 
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97 
98 cpumask_var_t __read_mostly	tracing_buffer_mask;
99 
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115 
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117 
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120 
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 	struct module			*mod;
125 	unsigned long			length;
126 };
127 
128 union trace_enum_map_item;
129 
130 struct trace_enum_map_tail {
131 	/*
132 	 * "end" is first and points to NULL as it must be different
133 	 * than "mod" or "enum_string"
134 	 */
135 	union trace_enum_map_item	*next;
136 	const char			*end;	/* points to NULL */
137 };
138 
139 static DEFINE_MUTEX(trace_enum_mutex);
140 
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149 	struct trace_enum_map		map;
150 	struct trace_enum_map_head	head;
151 	struct trace_enum_map_tail	tail;
152 };
153 
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156 
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158 
159 #define MAX_TRACER_SIZE		100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162 
163 static bool allocate_snapshot;
164 
set_cmdline_ftrace(char * str)165 static int __init set_cmdline_ftrace(char *str)
166 {
167 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 	default_bootup_tracer = bootup_tracer_buf;
169 	/* We are using ftrace early, expand it */
170 	ring_buffer_expanded = true;
171 	return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174 
set_ftrace_dump_on_oops(char * str)175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 	if (*str++ != '=' || !*str) {
178 		ftrace_dump_on_oops = DUMP_ALL;
179 		return 1;
180 	}
181 
182 	if (!strcmp("orig_cpu", str)) {
183 		ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186 
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190 
stop_trace_on_warning(char * str)191 static int __init stop_trace_on_warning(char *str)
192 {
193 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 		__disable_trace_on_warning = 1;
195 	return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198 
boot_alloc_snapshot(char * str)199 static int __init boot_alloc_snapshot(char *str)
200 {
201 	allocate_snapshot = true;
202 	/* We also need the main ring buffer expanded */
203 	ring_buffer_expanded = true;
204 	return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207 
208 
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210 
set_trace_boot_options(char * str)211 static int __init set_trace_boot_options(char *str)
212 {
213 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 	return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217 
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220 
set_trace_boot_clock(char * str)221 static int __init set_trace_boot_clock(char *str)
222 {
223 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 	trace_boot_clock = trace_boot_clock_buf;
225 	return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228 
set_tracepoint_printk(char * str)229 static int __init set_tracepoint_printk(char *str)
230 {
231 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 		tracepoint_printk = 1;
233 	return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236 
ns2usecs(cycle_t nsec)237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 	nsec += 500;
240 	do_div(nsec, 1000);
241 	return nsec;
242 }
243 
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS						\
246 	(FUNCTION_DEFAULT_FLAGS |					\
247 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
248 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
249 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
250 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251 
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
254 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255 
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 	TRACE_ITER_EVENT_FORK
259 
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273 	.trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275 
276 LIST_HEAD(ftrace_trace_arrays);
277 
trace_array_get(struct trace_array * this_tr)278 int trace_array_get(struct trace_array *this_tr)
279 {
280 	struct trace_array *tr;
281 	int ret = -ENODEV;
282 
283 	mutex_lock(&trace_types_lock);
284 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 		if (tr == this_tr) {
286 			tr->ref++;
287 			ret = 0;
288 			break;
289 		}
290 	}
291 	mutex_unlock(&trace_types_lock);
292 
293 	return ret;
294 }
295 
__trace_array_put(struct trace_array * this_tr)296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 	WARN_ON(!this_tr->ref);
299 	this_tr->ref--;
300 }
301 
trace_array_put(struct trace_array * this_tr)302 void trace_array_put(struct trace_array *this_tr)
303 {
304 	mutex_lock(&trace_types_lock);
305 	__trace_array_put(this_tr);
306 	mutex_unlock(&trace_types_lock);
307 }
308 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct ring_buffer * buffer,struct ring_buffer_event * event)309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 			      struct ring_buffer *buffer,
311 			      struct ring_buffer_event *event)
312 {
313 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 	    !filter_match_preds(call->filter, rec)) {
315 		__trace_event_discard_commit(buffer, event);
316 		return 1;
317 	}
318 
319 	return 0;
320 }
321 
trace_free_pid_list(struct trace_pid_list * pid_list)322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 	vfree(pid_list->pids);
325 	kfree(pid_list);
326 }
327 
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 	/*
339 	 * If pid_max changed after filtered_pids was created, we
340 	 * by default ignore all pids greater than the previous pid_max.
341 	 */
342 	if (search_pid >= filtered_pids->pid_max)
343 		return false;
344 
345 	return test_bit(search_pid, filtered_pids->pids);
346 }
347 
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct task_struct * task)358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 	/*
361 	 * Return false, because if filtered_pids does not exist,
362 	 * all pids are good to trace.
363 	 */
364 	if (!filtered_pids)
365 		return false;
366 
367 	return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369 
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 				  struct task_struct *self,
384 				  struct task_struct *task)
385 {
386 	if (!pid_list)
387 		return;
388 
389 	/* For forks, we only add if the forking task is listed */
390 	if (self) {
391 		if (!trace_find_filtered_pid(pid_list, self->pid))
392 			return;
393 	}
394 
395 	/* Sorry, but we don't support pid_max changing after setting */
396 	if (task->pid >= pid_list->pid_max)
397 		return;
398 
399 	/* "self" is set for forks, and NULL for exits */
400 	if (self)
401 		set_bit(task->pid, pid_list->pids);
402 	else
403 		clear_bit(task->pid, pid_list->pids);
404 }
405 
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 	unsigned long pid = (unsigned long)v;
421 
422 	(*pos)++;
423 
424 	/* pid already is +1 of the actual prevous bit */
425 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426 
427 	/* Return pid + 1 to allow zero to be represented */
428 	if (pid < pid_list->pid_max)
429 		return (void *)(pid + 1);
430 
431 	return NULL;
432 }
433 
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 	unsigned long pid;
448 	loff_t l = 0;
449 
450 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 	if (pid >= pid_list->pid_max)
452 		return NULL;
453 
454 	/* Return pid + 1 so that zero can be the exit value */
455 	for (pid++; pid && l < *pos;
456 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 		;
458 	return (void *)pid;
459 }
460 
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
trace_pid_show(struct seq_file * m,void * v)469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 	unsigned long pid = (unsigned long)v - 1;
472 
473 	seq_printf(m, "%lu\n", pid);
474 	return 0;
475 }
476 
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE		127
479 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 		    struct trace_pid_list **new_pid_list,
482 		    const char __user *ubuf, size_t cnt)
483 {
484 	struct trace_pid_list *pid_list;
485 	struct trace_parser parser;
486 	unsigned long val;
487 	int nr_pids = 0;
488 	ssize_t read = 0;
489 	ssize_t ret = 0;
490 	loff_t pos;
491 	pid_t pid;
492 
493 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 		return -ENOMEM;
495 
496 	/*
497 	 * Always recreate a new array. The write is an all or nothing
498 	 * operation. Always create a new array when adding new pids by
499 	 * the user. If the operation fails, then the current list is
500 	 * not modified.
501 	 */
502 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 	if (!pid_list)
504 		return -ENOMEM;
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		kfree(pid_list);
515 		return -ENOMEM;
516 	}
517 
518 	if (filtered_pids) {
519 		/* copy the current bits to the new max */
520 		for_each_set_bit(pid, filtered_pids->pids,
521 				 filtered_pids->pid_max) {
522 			set_bit(pid, pid_list->pids);
523 			nr_pids++;
524 		}
525 	}
526 
527 	while (cnt > 0) {
528 
529 		pos = 0;
530 
531 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
532 		if (ret < 0 || !trace_parser_loaded(&parser))
533 			break;
534 
535 		read += ret;
536 		ubuf += ret;
537 		cnt -= ret;
538 
539 		parser.buffer[parser.idx] = 0;
540 
541 		ret = -EINVAL;
542 		if (kstrtoul(parser.buffer, 0, &val))
543 			break;
544 		if (val >= pid_list->pid_max)
545 			break;
546 
547 		pid = (pid_t)val;
548 
549 		set_bit(pid, pid_list->pids);
550 		nr_pids++;
551 
552 		trace_parser_clear(&parser);
553 		ret = 0;
554 	}
555 	trace_parser_put(&parser);
556 
557 	if (ret < 0) {
558 		trace_free_pid_list(pid_list);
559 		return ret;
560 	}
561 
562 	if (!nr_pids) {
563 		/* Cleared the list of pids */
564 		trace_free_pid_list(pid_list);
565 		read = ret;
566 		pid_list = NULL;
567 	}
568 
569 	*new_pid_list = pid_list;
570 
571 	return read;
572 }
573 
buffer_ftrace_now(struct trace_buffer * buf,int cpu)574 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
575 {
576 	u64 ts;
577 
578 	/* Early boot up does not have a buffer yet */
579 	if (!buf->buffer)
580 		return trace_clock_local();
581 
582 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
583 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
584 
585 	return ts;
586 }
587 
ftrace_now(int cpu)588 cycle_t ftrace_now(int cpu)
589 {
590 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
591 }
592 
593 /**
594  * tracing_is_enabled - Show if global_trace has been disabled
595  *
596  * Shows if the global trace has been enabled or not. It uses the
597  * mirror flag "buffer_disabled" to be used in fast paths such as for
598  * the irqsoff tracer. But it may be inaccurate due to races. If you
599  * need to know the accurate state, use tracing_is_on() which is a little
600  * slower, but accurate.
601  */
tracing_is_enabled(void)602 int tracing_is_enabled(void)
603 {
604 	/*
605 	 * For quick access (irqsoff uses this in fast path), just
606 	 * return the mirror variable of the state of the ring buffer.
607 	 * It's a little racy, but we don't really care.
608 	 */
609 	smp_rmb();
610 	return !global_trace.buffer_disabled;
611 }
612 
613 /*
614  * trace_buf_size is the size in bytes that is allocated
615  * for a buffer. Note, the number of bytes is always rounded
616  * to page size.
617  *
618  * This number is purposely set to a low number of 16384.
619  * If the dump on oops happens, it will be much appreciated
620  * to not have to wait for all that output. Anyway this can be
621  * boot time and run time configurable.
622  */
623 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
624 
625 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
626 
627 /* trace_types holds a link list of available tracers. */
628 static struct tracer		*trace_types __read_mostly;
629 
630 /*
631  * trace_types_lock is used to protect the trace_types list.
632  */
633 DEFINE_MUTEX(trace_types_lock);
634 
635 /*
636  * serialize the access of the ring buffer
637  *
638  * ring buffer serializes readers, but it is low level protection.
639  * The validity of the events (which returns by ring_buffer_peek() ..etc)
640  * are not protected by ring buffer.
641  *
642  * The content of events may become garbage if we allow other process consumes
643  * these events concurrently:
644  *   A) the page of the consumed events may become a normal page
645  *      (not reader page) in ring buffer, and this page will be rewrited
646  *      by events producer.
647  *   B) The page of the consumed events may become a page for splice_read,
648  *      and this page will be returned to system.
649  *
650  * These primitives allow multi process access to different cpu ring buffer
651  * concurrently.
652  *
653  * These primitives don't distinguish read-only and read-consume access.
654  * Multi read-only access are also serialized.
655  */
656 
657 #ifdef CONFIG_SMP
658 static DECLARE_RWSEM(all_cpu_access_lock);
659 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
660 
trace_access_lock(int cpu)661 static inline void trace_access_lock(int cpu)
662 {
663 	if (cpu == RING_BUFFER_ALL_CPUS) {
664 		/* gain it for accessing the whole ring buffer. */
665 		down_write(&all_cpu_access_lock);
666 	} else {
667 		/* gain it for accessing a cpu ring buffer. */
668 
669 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
670 		down_read(&all_cpu_access_lock);
671 
672 		/* Secondly block other access to this @cpu ring buffer. */
673 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
674 	}
675 }
676 
trace_access_unlock(int cpu)677 static inline void trace_access_unlock(int cpu)
678 {
679 	if (cpu == RING_BUFFER_ALL_CPUS) {
680 		up_write(&all_cpu_access_lock);
681 	} else {
682 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
683 		up_read(&all_cpu_access_lock);
684 	}
685 }
686 
trace_access_lock_init(void)687 static inline void trace_access_lock_init(void)
688 {
689 	int cpu;
690 
691 	for_each_possible_cpu(cpu)
692 		mutex_init(&per_cpu(cpu_access_lock, cpu));
693 }
694 
695 #else
696 
697 static DEFINE_MUTEX(access_lock);
698 
trace_access_lock(int cpu)699 static inline void trace_access_lock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_lock(&access_lock);
703 }
704 
trace_access_unlock(int cpu)705 static inline void trace_access_unlock(int cpu)
706 {
707 	(void)cpu;
708 	mutex_unlock(&access_lock);
709 }
710 
trace_access_lock_init(void)711 static inline void trace_access_lock_init(void)
712 {
713 }
714 
715 #endif
716 
717 #ifdef CONFIG_STACKTRACE
718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
719 				 unsigned long flags,
720 				 int skip, int pc, struct pt_regs *regs);
721 static inline void ftrace_trace_stack(struct trace_array *tr,
722 				      struct ring_buffer *buffer,
723 				      unsigned long flags,
724 				      int skip, int pc, struct pt_regs *regs);
725 
726 #else
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)727 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
728 					unsigned long flags,
729 					int skip, int pc, struct pt_regs *regs)
730 {
731 }
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)732 static inline void ftrace_trace_stack(struct trace_array *tr,
733 				      struct ring_buffer *buffer,
734 				      unsigned long flags,
735 				      int skip, int pc, struct pt_regs *regs)
736 {
737 }
738 
739 #endif
740 
tracer_tracing_on(struct trace_array * tr)741 static void tracer_tracing_on(struct trace_array *tr)
742 {
743 	if (tr->trace_buffer.buffer)
744 		ring_buffer_record_on(tr->trace_buffer.buffer);
745 	/*
746 	 * This flag is looked at when buffers haven't been allocated
747 	 * yet, or by some tracers (like irqsoff), that just want to
748 	 * know if the ring buffer has been disabled, but it can handle
749 	 * races of where it gets disabled but we still do a record.
750 	 * As the check is in the fast path of the tracers, it is more
751 	 * important to be fast than accurate.
752 	 */
753 	tr->buffer_disabled = 0;
754 	/* Make the flag seen by readers */
755 	smp_wmb();
756 }
757 
758 /**
759  * tracing_on - enable tracing buffers
760  *
761  * This function enables tracing buffers that may have been
762  * disabled with tracing_off.
763  */
tracing_on(void)764 void tracing_on(void)
765 {
766 	tracer_tracing_on(&global_trace);
767 }
768 EXPORT_SYMBOL_GPL(tracing_on);
769 
770 /**
771  * __trace_puts - write a constant string into the trace buffer.
772  * @ip:	   The address of the caller
773  * @str:   The constant string to write
774  * @size:  The size of the string.
775  */
__trace_puts(unsigned long ip,const char * str,int size)776 int __trace_puts(unsigned long ip, const char *str, int size)
777 {
778 	struct ring_buffer_event *event;
779 	struct ring_buffer *buffer;
780 	struct print_entry *entry;
781 	unsigned long irq_flags;
782 	int alloc;
783 	int pc;
784 
785 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
786 		return 0;
787 
788 	pc = preempt_count();
789 
790 	if (unlikely(tracing_selftest_running || tracing_disabled))
791 		return 0;
792 
793 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
794 
795 	local_save_flags(irq_flags);
796 	buffer = global_trace.trace_buffer.buffer;
797 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
798 					  irq_flags, pc);
799 	if (!event)
800 		return 0;
801 
802 	entry = ring_buffer_event_data(event);
803 	entry->ip = ip;
804 
805 	memcpy(&entry->buf, str, size);
806 
807 	/* Add a newline if necessary */
808 	if (entry->buf[size - 1] != '\n') {
809 		entry->buf[size] = '\n';
810 		entry->buf[size + 1] = '\0';
811 	} else
812 		entry->buf[size] = '\0';
813 
814 	__buffer_unlock_commit(buffer, event);
815 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
816 
817 	return size;
818 }
819 EXPORT_SYMBOL_GPL(__trace_puts);
820 
821 /**
822  * __trace_bputs - write the pointer to a constant string into trace buffer
823  * @ip:	   The address of the caller
824  * @str:   The constant string to write to the buffer to
825  */
__trace_bputs(unsigned long ip,const char * str)826 int __trace_bputs(unsigned long ip, const char *str)
827 {
828 	struct ring_buffer_event *event;
829 	struct ring_buffer *buffer;
830 	struct bputs_entry *entry;
831 	unsigned long irq_flags;
832 	int size = sizeof(struct bputs_entry);
833 	int pc;
834 
835 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
836 		return 0;
837 
838 	pc = preempt_count();
839 
840 	if (unlikely(tracing_selftest_running || tracing_disabled))
841 		return 0;
842 
843 	local_save_flags(irq_flags);
844 	buffer = global_trace.trace_buffer.buffer;
845 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
846 					  irq_flags, pc);
847 	if (!event)
848 		return 0;
849 
850 	entry = ring_buffer_event_data(event);
851 	entry->ip			= ip;
852 	entry->str			= str;
853 
854 	__buffer_unlock_commit(buffer, event);
855 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856 
857 	return 1;
858 }
859 EXPORT_SYMBOL_GPL(__trace_bputs);
860 
861 #ifdef CONFIG_TRACER_SNAPSHOT
862 /**
863  * trace_snapshot - take a snapshot of the current buffer.
864  *
865  * This causes a swap between the snapshot buffer and the current live
866  * tracing buffer. You can use this to take snapshots of the live
867  * trace when some condition is triggered, but continue to trace.
868  *
869  * Note, make sure to allocate the snapshot with either
870  * a tracing_snapshot_alloc(), or by doing it manually
871  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
872  *
873  * If the snapshot buffer is not allocated, it will stop tracing.
874  * Basically making a permanent snapshot.
875  */
tracing_snapshot(void)876 void tracing_snapshot(void)
877 {
878 	struct trace_array *tr = &global_trace;
879 	struct tracer *tracer = tr->current_trace;
880 	unsigned long flags;
881 
882 	if (in_nmi()) {
883 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
884 		internal_trace_puts("*** snapshot is being ignored        ***\n");
885 		return;
886 	}
887 
888 	if (!tr->allocated_snapshot) {
889 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
890 		internal_trace_puts("*** stopping trace here!   ***\n");
891 		tracing_off();
892 		return;
893 	}
894 
895 	/* Note, snapshot can not be used when the tracer uses it */
896 	if (tracer->use_max_tr) {
897 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
898 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
899 		return;
900 	}
901 
902 	local_irq_save(flags);
903 	update_max_tr(tr, current, smp_processor_id());
904 	local_irq_restore(flags);
905 }
906 EXPORT_SYMBOL_GPL(tracing_snapshot);
907 
908 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
909 					struct trace_buffer *size_buf, int cpu_id);
910 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
911 
alloc_snapshot(struct trace_array * tr)912 static int alloc_snapshot(struct trace_array *tr)
913 {
914 	int ret;
915 
916 	if (!tr->allocated_snapshot) {
917 
918 		/* allocate spare buffer */
919 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
920 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
921 		if (ret < 0)
922 			return ret;
923 
924 		tr->allocated_snapshot = true;
925 	}
926 
927 	return 0;
928 }
929 
free_snapshot(struct trace_array * tr)930 static void free_snapshot(struct trace_array *tr)
931 {
932 	/*
933 	 * We don't free the ring buffer. instead, resize it because
934 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
935 	 * we want preserve it.
936 	 */
937 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
938 	set_buffer_entries(&tr->max_buffer, 1);
939 	tracing_reset_online_cpus(&tr->max_buffer);
940 	tr->allocated_snapshot = false;
941 }
942 
943 /**
944  * tracing_alloc_snapshot - allocate snapshot buffer.
945  *
946  * This only allocates the snapshot buffer if it isn't already
947  * allocated - it doesn't also take a snapshot.
948  *
949  * This is meant to be used in cases where the snapshot buffer needs
950  * to be set up for events that can't sleep but need to be able to
951  * trigger a snapshot.
952  */
tracing_alloc_snapshot(void)953 int tracing_alloc_snapshot(void)
954 {
955 	struct trace_array *tr = &global_trace;
956 	int ret;
957 
958 	ret = alloc_snapshot(tr);
959 	WARN_ON(ret < 0);
960 
961 	return ret;
962 }
963 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
964 
965 /**
966  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
967  *
968  * This is similar to trace_snapshot(), but it will allocate the
969  * snapshot buffer if it isn't already allocated. Use this only
970  * where it is safe to sleep, as the allocation may sleep.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  */
tracing_snapshot_alloc(void)976 void tracing_snapshot_alloc(void)
977 {
978 	int ret;
979 
980 	ret = tracing_alloc_snapshot();
981 	if (ret < 0)
982 		return;
983 
984 	tracing_snapshot();
985 }
986 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
987 #else
tracing_snapshot(void)988 void tracing_snapshot(void)
989 {
990 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
991 }
992 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_alloc_snapshot(void)993 int tracing_alloc_snapshot(void)
994 {
995 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
996 	return -ENODEV;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)999 void tracing_snapshot_alloc(void)
1000 {
1001 	/* Give warning */
1002 	tracing_snapshot();
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005 #endif /* CONFIG_TRACER_SNAPSHOT */
1006 
tracer_tracing_off(struct trace_array * tr)1007 static void tracer_tracing_off(struct trace_array *tr)
1008 {
1009 	if (tr->trace_buffer.buffer)
1010 		ring_buffer_record_off(tr->trace_buffer.buffer);
1011 	/*
1012 	 * This flag is looked at when buffers haven't been allocated
1013 	 * yet, or by some tracers (like irqsoff), that just want to
1014 	 * know if the ring buffer has been disabled, but it can handle
1015 	 * races of where it gets disabled but we still do a record.
1016 	 * As the check is in the fast path of the tracers, it is more
1017 	 * important to be fast than accurate.
1018 	 */
1019 	tr->buffer_disabled = 1;
1020 	/* Make the flag seen by readers */
1021 	smp_wmb();
1022 }
1023 
1024 /**
1025  * tracing_off - turn off tracing buffers
1026  *
1027  * This function stops the tracing buffers from recording data.
1028  * It does not disable any overhead the tracers themselves may
1029  * be causing. This function simply causes all recording to
1030  * the ring buffers to fail.
1031  */
tracing_off(void)1032 void tracing_off(void)
1033 {
1034 	tracer_tracing_off(&global_trace);
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_off);
1037 
disable_trace_on_warning(void)1038 void disable_trace_on_warning(void)
1039 {
1040 	if (__disable_trace_on_warning)
1041 		tracing_off();
1042 }
1043 
1044 /**
1045  * tracer_tracing_is_on - show real state of ring buffer enabled
1046  * @tr : the trace array to know if ring buffer is enabled
1047  *
1048  * Shows real state of the ring buffer if it is enabled or not.
1049  */
tracer_tracing_is_on(struct trace_array * tr)1050 int tracer_tracing_is_on(struct trace_array *tr)
1051 {
1052 	if (tr->trace_buffer.buffer)
1053 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054 	return !tr->buffer_disabled;
1055 }
1056 
1057 /**
1058  * tracing_is_on - show state of ring buffers enabled
1059  */
tracing_is_on(void)1060 int tracing_is_on(void)
1061 {
1062 	return tracer_tracing_is_on(&global_trace);
1063 }
1064 EXPORT_SYMBOL_GPL(tracing_is_on);
1065 
set_buf_size(char * str)1066 static int __init set_buf_size(char *str)
1067 {
1068 	unsigned long buf_size;
1069 
1070 	if (!str)
1071 		return 0;
1072 	buf_size = memparse(str, &str);
1073 	/* nr_entries can not be zero */
1074 	if (buf_size == 0)
1075 		return 0;
1076 	trace_buf_size = buf_size;
1077 	return 1;
1078 }
1079 __setup("trace_buf_size=", set_buf_size);
1080 
set_tracing_thresh(char * str)1081 static int __init set_tracing_thresh(char *str)
1082 {
1083 	unsigned long threshold;
1084 	int ret;
1085 
1086 	if (!str)
1087 		return 0;
1088 	ret = kstrtoul(str, 0, &threshold);
1089 	if (ret < 0)
1090 		return 0;
1091 	tracing_thresh = threshold * 1000;
1092 	return 1;
1093 }
1094 __setup("tracing_thresh=", set_tracing_thresh);
1095 
nsecs_to_usecs(unsigned long nsecs)1096 unsigned long nsecs_to_usecs(unsigned long nsecs)
1097 {
1098 	return nsecs / 1000;
1099 }
1100 
1101 /*
1102  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105  * of strings in the order that the enums were defined.
1106  */
1107 #undef C
1108 #define C(a, b) b
1109 
1110 /* These must match the bit postions in trace_iterator_flags */
1111 static const char *trace_options[] = {
1112 	TRACE_FLAGS
1113 	NULL
1114 };
1115 
1116 static struct {
1117 	u64 (*func)(void);
1118 	const char *name;
1119 	int in_ns;		/* is this clock in nanoseconds? */
1120 } trace_clocks[] = {
1121 	{ trace_clock_local,		"local",	1 },
1122 	{ trace_clock_global,		"global",	1 },
1123 	{ trace_clock_counter,		"counter",	0 },
1124 	{ trace_clock_jiffies,		"uptime",	0 },
1125 	{ trace_clock,			"perf",		1 },
1126 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1127 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1128 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1129 	ARCH_TRACE_CLOCKS
1130 };
1131 
1132 /*
1133  * trace_parser_get_init - gets the buffer for trace parser
1134  */
trace_parser_get_init(struct trace_parser * parser,int size)1135 int trace_parser_get_init(struct trace_parser *parser, int size)
1136 {
1137 	memset(parser, 0, sizeof(*parser));
1138 
1139 	parser->buffer = kmalloc(size, GFP_KERNEL);
1140 	if (!parser->buffer)
1141 		return 1;
1142 
1143 	parser->size = size;
1144 	return 0;
1145 }
1146 
1147 /*
1148  * trace_parser_put - frees the buffer for trace parser
1149  */
trace_parser_put(struct trace_parser * parser)1150 void trace_parser_put(struct trace_parser *parser)
1151 {
1152 	kfree(parser->buffer);
1153 }
1154 
1155 /*
1156  * trace_get_user - reads the user input string separated by  space
1157  * (matched by isspace(ch))
1158  *
1159  * For each string found the 'struct trace_parser' is updated,
1160  * and the function returns.
1161  *
1162  * Returns number of bytes read.
1163  *
1164  * See kernel/trace/trace.h for 'struct trace_parser' details.
1165  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1166 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1167 	size_t cnt, loff_t *ppos)
1168 {
1169 	char ch;
1170 	size_t read = 0;
1171 	ssize_t ret;
1172 
1173 	if (!*ppos)
1174 		trace_parser_clear(parser);
1175 
1176 	ret = get_user(ch, ubuf++);
1177 	if (ret)
1178 		goto out;
1179 
1180 	read++;
1181 	cnt--;
1182 
1183 	/*
1184 	 * The parser is not finished with the last write,
1185 	 * continue reading the user input without skipping spaces.
1186 	 */
1187 	if (!parser->cont) {
1188 		/* skip white space */
1189 		while (cnt && isspace(ch)) {
1190 			ret = get_user(ch, ubuf++);
1191 			if (ret)
1192 				goto out;
1193 			read++;
1194 			cnt--;
1195 		}
1196 
1197 		/* only spaces were written */
1198 		if (isspace(ch)) {
1199 			*ppos += read;
1200 			ret = read;
1201 			goto out;
1202 		}
1203 
1204 		parser->idx = 0;
1205 	}
1206 
1207 	/* read the non-space input */
1208 	while (cnt && !isspace(ch)) {
1209 		if (parser->idx < parser->size - 1)
1210 			parser->buffer[parser->idx++] = ch;
1211 		else {
1212 			ret = -EINVAL;
1213 			goto out;
1214 		}
1215 		ret = get_user(ch, ubuf++);
1216 		if (ret)
1217 			goto out;
1218 		read++;
1219 		cnt--;
1220 	}
1221 
1222 	/* We either got finished input or we have to wait for another call. */
1223 	if (isspace(ch)) {
1224 		parser->buffer[parser->idx] = 0;
1225 		parser->cont = false;
1226 	} else if (parser->idx < parser->size - 1) {
1227 		parser->cont = true;
1228 		parser->buffer[parser->idx++] = ch;
1229 	} else {
1230 		ret = -EINVAL;
1231 		goto out;
1232 	}
1233 
1234 	*ppos += read;
1235 	ret = read;
1236 
1237 out:
1238 	return ret;
1239 }
1240 
1241 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1242 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1243 {
1244 	int len;
1245 
1246 	if (trace_seq_used(s) <= s->seq.readpos)
1247 		return -EBUSY;
1248 
1249 	len = trace_seq_used(s) - s->seq.readpos;
1250 	if (cnt > len)
1251 		cnt = len;
1252 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1253 
1254 	s->seq.readpos += cnt;
1255 	return cnt;
1256 }
1257 
1258 unsigned long __read_mostly	tracing_thresh;
1259 
1260 #ifdef CONFIG_TRACER_MAX_TRACE
1261 /*
1262  * Copy the new maximum trace into the separate maximum-trace
1263  * structure. (this way the maximum trace is permanently saved,
1264  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1265  */
1266 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1267 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1268 {
1269 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1270 	struct trace_buffer *max_buf = &tr->max_buffer;
1271 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1272 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1273 
1274 	max_buf->cpu = cpu;
1275 	max_buf->time_start = data->preempt_timestamp;
1276 
1277 	max_data->saved_latency = tr->max_latency;
1278 	max_data->critical_start = data->critical_start;
1279 	max_data->critical_end = data->critical_end;
1280 
1281 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1282 	max_data->pid = tsk->pid;
1283 	/*
1284 	 * If tsk == current, then use current_uid(), as that does not use
1285 	 * RCU. The irq tracer can be called out of RCU scope.
1286 	 */
1287 	if (tsk == current)
1288 		max_data->uid = current_uid();
1289 	else
1290 		max_data->uid = task_uid(tsk);
1291 
1292 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1293 	max_data->policy = tsk->policy;
1294 	max_data->rt_priority = tsk->rt_priority;
1295 
1296 	/* record this tasks comm */
1297 	tracing_record_cmdline(tsk);
1298 }
1299 
1300 /**
1301  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1302  * @tr: tracer
1303  * @tsk: the task with the latency
1304  * @cpu: The cpu that initiated the trace.
1305  *
1306  * Flip the buffers between the @tr and the max_tr and record information
1307  * about which task was the cause of this latency.
1308  */
1309 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1310 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1311 {
1312 	struct ring_buffer *buf;
1313 
1314 	if (tr->stop_count)
1315 		return;
1316 
1317 	WARN_ON_ONCE(!irqs_disabled());
1318 
1319 	if (!tr->allocated_snapshot) {
1320 		/* Only the nop tracer should hit this when disabling */
1321 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1322 		return;
1323 	}
1324 
1325 	arch_spin_lock(&tr->max_lock);
1326 
1327 	buf = tr->trace_buffer.buffer;
1328 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1329 	tr->max_buffer.buffer = buf;
1330 
1331 	__update_max_tr(tr, tsk, cpu);
1332 	arch_spin_unlock(&tr->max_lock);
1333 }
1334 
1335 /**
1336  * update_max_tr_single - only copy one trace over, and reset the rest
1337  * @tr - tracer
1338  * @tsk - task with the latency
1339  * @cpu - the cpu of the buffer to copy.
1340  *
1341  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1342  */
1343 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1344 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1345 {
1346 	int ret;
1347 
1348 	if (tr->stop_count)
1349 		return;
1350 
1351 	WARN_ON_ONCE(!irqs_disabled());
1352 	if (!tr->allocated_snapshot) {
1353 		/* Only the nop tracer should hit this when disabling */
1354 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1355 		return;
1356 	}
1357 
1358 	arch_spin_lock(&tr->max_lock);
1359 
1360 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1361 
1362 	if (ret == -EBUSY) {
1363 		/*
1364 		 * We failed to swap the buffer due to a commit taking
1365 		 * place on this CPU. We fail to record, but we reset
1366 		 * the max trace buffer (no one writes directly to it)
1367 		 * and flag that it failed.
1368 		 */
1369 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1370 			"Failed to swap buffers due to commit in progress\n");
1371 	}
1372 
1373 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1374 
1375 	__update_max_tr(tr, tsk, cpu);
1376 	arch_spin_unlock(&tr->max_lock);
1377 }
1378 #endif /* CONFIG_TRACER_MAX_TRACE */
1379 
wait_on_pipe(struct trace_iterator * iter,bool full)1380 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1381 {
1382 	/* Iterators are static, they should be filled or empty */
1383 	if (trace_buffer_iter(iter, iter->cpu_file))
1384 		return 0;
1385 
1386 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1387 				full);
1388 }
1389 
1390 #ifdef CONFIG_FTRACE_STARTUP_TEST
run_tracer_selftest(struct tracer * type)1391 static int run_tracer_selftest(struct tracer *type)
1392 {
1393 	struct trace_array *tr = &global_trace;
1394 	struct tracer *saved_tracer = tr->current_trace;
1395 	int ret;
1396 
1397 	if (!type->selftest || tracing_selftest_disabled)
1398 		return 0;
1399 
1400 	/*
1401 	 * Run a selftest on this tracer.
1402 	 * Here we reset the trace buffer, and set the current
1403 	 * tracer to be this tracer. The tracer can then run some
1404 	 * internal tracing to verify that everything is in order.
1405 	 * If we fail, we do not register this tracer.
1406 	 */
1407 	tracing_reset_online_cpus(&tr->trace_buffer);
1408 
1409 	tr->current_trace = type;
1410 
1411 #ifdef CONFIG_TRACER_MAX_TRACE
1412 	if (type->use_max_tr) {
1413 		/* If we expanded the buffers, make sure the max is expanded too */
1414 		if (ring_buffer_expanded)
1415 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1416 					   RING_BUFFER_ALL_CPUS);
1417 		tr->allocated_snapshot = true;
1418 	}
1419 #endif
1420 
1421 	/* the test is responsible for initializing and enabling */
1422 	pr_info("Testing tracer %s: ", type->name);
1423 	ret = type->selftest(type, tr);
1424 	/* the test is responsible for resetting too */
1425 	tr->current_trace = saved_tracer;
1426 	if (ret) {
1427 		printk(KERN_CONT "FAILED!\n");
1428 		/* Add the warning after printing 'FAILED' */
1429 		WARN_ON(1);
1430 		return -1;
1431 	}
1432 	/* Only reset on passing, to avoid touching corrupted buffers */
1433 	tracing_reset_online_cpus(&tr->trace_buffer);
1434 
1435 #ifdef CONFIG_TRACER_MAX_TRACE
1436 	if (type->use_max_tr) {
1437 		tr->allocated_snapshot = false;
1438 
1439 		/* Shrink the max buffer again */
1440 		if (ring_buffer_expanded)
1441 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1442 					   RING_BUFFER_ALL_CPUS);
1443 	}
1444 #endif
1445 
1446 	printk(KERN_CONT "PASSED\n");
1447 	return 0;
1448 }
1449 #else
run_tracer_selftest(struct tracer * type)1450 static inline int run_tracer_selftest(struct tracer *type)
1451 {
1452 	return 0;
1453 }
1454 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1455 
1456 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1457 
1458 static void __init apply_trace_boot_options(void);
1459 
1460 /**
1461  * register_tracer - register a tracer with the ftrace system.
1462  * @type - the plugin for the tracer
1463  *
1464  * Register a new plugin tracer.
1465  */
register_tracer(struct tracer * type)1466 int __init register_tracer(struct tracer *type)
1467 {
1468 	struct tracer *t;
1469 	int ret = 0;
1470 
1471 	if (!type->name) {
1472 		pr_info("Tracer must have a name\n");
1473 		return -1;
1474 	}
1475 
1476 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1477 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1478 		return -1;
1479 	}
1480 
1481 	mutex_lock(&trace_types_lock);
1482 
1483 	tracing_selftest_running = true;
1484 
1485 	for (t = trace_types; t; t = t->next) {
1486 		if (strcmp(type->name, t->name) == 0) {
1487 			/* already found */
1488 			pr_info("Tracer %s already registered\n",
1489 				type->name);
1490 			ret = -1;
1491 			goto out;
1492 		}
1493 	}
1494 
1495 	if (!type->set_flag)
1496 		type->set_flag = &dummy_set_flag;
1497 	if (!type->flags) {
1498 		/*allocate a dummy tracer_flags*/
1499 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1500 		if (!type->flags) {
1501 			ret = -ENOMEM;
1502 			goto out;
1503 		}
1504 		type->flags->val = 0;
1505 		type->flags->opts = dummy_tracer_opt;
1506 	} else
1507 		if (!type->flags->opts)
1508 			type->flags->opts = dummy_tracer_opt;
1509 
1510 	/* store the tracer for __set_tracer_option */
1511 	type->flags->trace = type;
1512 
1513 	ret = run_tracer_selftest(type);
1514 	if (ret < 0)
1515 		goto out;
1516 
1517 	type->next = trace_types;
1518 	trace_types = type;
1519 	add_tracer_options(&global_trace, type);
1520 
1521  out:
1522 	tracing_selftest_running = false;
1523 	mutex_unlock(&trace_types_lock);
1524 
1525 	if (ret || !default_bootup_tracer)
1526 		goto out_unlock;
1527 
1528 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1529 		goto out_unlock;
1530 
1531 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1532 	/* Do we want this tracer to start on bootup? */
1533 	tracing_set_tracer(&global_trace, type->name);
1534 	default_bootup_tracer = NULL;
1535 
1536 	apply_trace_boot_options();
1537 
1538 	/* disable other selftests, since this will break it. */
1539 	tracing_selftest_disabled = true;
1540 #ifdef CONFIG_FTRACE_STARTUP_TEST
1541 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1542 	       type->name);
1543 #endif
1544 
1545  out_unlock:
1546 	return ret;
1547 }
1548 
tracing_reset(struct trace_buffer * buf,int cpu)1549 void tracing_reset(struct trace_buffer *buf, int cpu)
1550 {
1551 	struct ring_buffer *buffer = buf->buffer;
1552 
1553 	if (!buffer)
1554 		return;
1555 
1556 	ring_buffer_record_disable(buffer);
1557 
1558 	/* Make sure all commits have finished */
1559 	synchronize_sched();
1560 	ring_buffer_reset_cpu(buffer, cpu);
1561 
1562 	ring_buffer_record_enable(buffer);
1563 }
1564 
tracing_reset_online_cpus(struct trace_buffer * buf)1565 void tracing_reset_online_cpus(struct trace_buffer *buf)
1566 {
1567 	struct ring_buffer *buffer = buf->buffer;
1568 	int cpu;
1569 
1570 	if (!buffer)
1571 		return;
1572 
1573 	ring_buffer_record_disable(buffer);
1574 
1575 	/* Make sure all commits have finished */
1576 	synchronize_sched();
1577 
1578 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1579 
1580 	for_each_online_cpu(cpu)
1581 		ring_buffer_reset_cpu(buffer, cpu);
1582 
1583 	ring_buffer_record_enable(buffer);
1584 }
1585 
1586 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)1587 void tracing_reset_all_online_cpus(void)
1588 {
1589 	struct trace_array *tr;
1590 
1591 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1592 		tracing_reset_online_cpus(&tr->trace_buffer);
1593 #ifdef CONFIG_TRACER_MAX_TRACE
1594 		tracing_reset_online_cpus(&tr->max_buffer);
1595 #endif
1596 	}
1597 }
1598 
1599 #define SAVED_CMDLINES_DEFAULT 128
1600 #define NO_CMDLINE_MAP UINT_MAX
1601 static unsigned saved_tgids[SAVED_CMDLINES_DEFAULT];
1602 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1603 struct saved_cmdlines_buffer {
1604 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1605 	unsigned *map_cmdline_to_pid;
1606 	unsigned cmdline_num;
1607 	int cmdline_idx;
1608 	char *saved_cmdlines;
1609 };
1610 static struct saved_cmdlines_buffer *savedcmd;
1611 
1612 /* temporary disable recording */
1613 static atomic_t trace_record_cmdline_disabled __read_mostly;
1614 
get_saved_cmdlines(int idx)1615 static inline char *get_saved_cmdlines(int idx)
1616 {
1617 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1618 }
1619 
set_cmdline(int idx,const char * cmdline)1620 static inline void set_cmdline(int idx, const char *cmdline)
1621 {
1622 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1623 }
1624 
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)1625 static int allocate_cmdlines_buffer(unsigned int val,
1626 				    struct saved_cmdlines_buffer *s)
1627 {
1628 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1629 					GFP_KERNEL);
1630 	if (!s->map_cmdline_to_pid)
1631 		return -ENOMEM;
1632 
1633 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1634 	if (!s->saved_cmdlines) {
1635 		kfree(s->map_cmdline_to_pid);
1636 		return -ENOMEM;
1637 	}
1638 
1639 	s->cmdline_idx = 0;
1640 	s->cmdline_num = val;
1641 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1642 	       sizeof(s->map_pid_to_cmdline));
1643 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1644 	       val * sizeof(*s->map_cmdline_to_pid));
1645 
1646 	return 0;
1647 }
1648 
trace_create_savedcmd(void)1649 static int trace_create_savedcmd(void)
1650 {
1651 	int ret;
1652 
1653 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1654 	if (!savedcmd)
1655 		return -ENOMEM;
1656 
1657 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1658 	if (ret < 0) {
1659 		kfree(savedcmd);
1660 		savedcmd = NULL;
1661 		return -ENOMEM;
1662 	}
1663 
1664 	return 0;
1665 }
1666 
is_tracing_stopped(void)1667 int is_tracing_stopped(void)
1668 {
1669 	return global_trace.stop_count;
1670 }
1671 
1672 /**
1673  * tracing_start - quick start of the tracer
1674  *
1675  * If tracing is enabled but was stopped by tracing_stop,
1676  * this will start the tracer back up.
1677  */
tracing_start(void)1678 void tracing_start(void)
1679 {
1680 	struct ring_buffer *buffer;
1681 	unsigned long flags;
1682 
1683 	if (tracing_disabled)
1684 		return;
1685 
1686 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1687 	if (--global_trace.stop_count) {
1688 		if (global_trace.stop_count < 0) {
1689 			/* Someone screwed up their debugging */
1690 			WARN_ON_ONCE(1);
1691 			global_trace.stop_count = 0;
1692 		}
1693 		goto out;
1694 	}
1695 
1696 	/* Prevent the buffers from switching */
1697 	arch_spin_lock(&global_trace.max_lock);
1698 
1699 	buffer = global_trace.trace_buffer.buffer;
1700 	if (buffer)
1701 		ring_buffer_record_enable(buffer);
1702 
1703 #ifdef CONFIG_TRACER_MAX_TRACE
1704 	buffer = global_trace.max_buffer.buffer;
1705 	if (buffer)
1706 		ring_buffer_record_enable(buffer);
1707 #endif
1708 
1709 	arch_spin_unlock(&global_trace.max_lock);
1710 
1711  out:
1712 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1713 }
1714 
tracing_start_tr(struct trace_array * tr)1715 static void tracing_start_tr(struct trace_array *tr)
1716 {
1717 	struct ring_buffer *buffer;
1718 	unsigned long flags;
1719 
1720 	if (tracing_disabled)
1721 		return;
1722 
1723 	/* If global, we need to also start the max tracer */
1724 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1725 		return tracing_start();
1726 
1727 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1728 
1729 	if (--tr->stop_count) {
1730 		if (tr->stop_count < 0) {
1731 			/* Someone screwed up their debugging */
1732 			WARN_ON_ONCE(1);
1733 			tr->stop_count = 0;
1734 		}
1735 		goto out;
1736 	}
1737 
1738 	buffer = tr->trace_buffer.buffer;
1739 	if (buffer)
1740 		ring_buffer_record_enable(buffer);
1741 
1742  out:
1743 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1744 }
1745 
1746 /**
1747  * tracing_stop - quick stop of the tracer
1748  *
1749  * Light weight way to stop tracing. Use in conjunction with
1750  * tracing_start.
1751  */
tracing_stop(void)1752 void tracing_stop(void)
1753 {
1754 	struct ring_buffer *buffer;
1755 	unsigned long flags;
1756 
1757 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1758 	if (global_trace.stop_count++)
1759 		goto out;
1760 
1761 	/* Prevent the buffers from switching */
1762 	arch_spin_lock(&global_trace.max_lock);
1763 
1764 	buffer = global_trace.trace_buffer.buffer;
1765 	if (buffer)
1766 		ring_buffer_record_disable(buffer);
1767 
1768 #ifdef CONFIG_TRACER_MAX_TRACE
1769 	buffer = global_trace.max_buffer.buffer;
1770 	if (buffer)
1771 		ring_buffer_record_disable(buffer);
1772 #endif
1773 
1774 	arch_spin_unlock(&global_trace.max_lock);
1775 
1776  out:
1777 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1778 }
1779 
tracing_stop_tr(struct trace_array * tr)1780 static void tracing_stop_tr(struct trace_array *tr)
1781 {
1782 	struct ring_buffer *buffer;
1783 	unsigned long flags;
1784 
1785 	/* If global, we need to also stop the max tracer */
1786 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1787 		return tracing_stop();
1788 
1789 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1790 	if (tr->stop_count++)
1791 		goto out;
1792 
1793 	buffer = tr->trace_buffer.buffer;
1794 	if (buffer)
1795 		ring_buffer_record_disable(buffer);
1796 
1797  out:
1798 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1799 }
1800 
1801 void trace_stop_cmdline_recording(void);
1802 
trace_save_cmdline(struct task_struct * tsk)1803 static int trace_save_cmdline(struct task_struct *tsk)
1804 {
1805 	unsigned pid, idx;
1806 
1807 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1808 		return 0;
1809 
1810 	/*
1811 	 * It's not the end of the world if we don't get
1812 	 * the lock, but we also don't want to spin
1813 	 * nor do we want to disable interrupts,
1814 	 * so if we miss here, then better luck next time.
1815 	 */
1816 	if (!arch_spin_trylock(&trace_cmdline_lock))
1817 		return 0;
1818 
1819 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1820 	if (idx == NO_CMDLINE_MAP) {
1821 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1822 
1823 		/*
1824 		 * Check whether the cmdline buffer at idx has a pid
1825 		 * mapped. We are going to overwrite that entry so we
1826 		 * need to clear the map_pid_to_cmdline. Otherwise we
1827 		 * would read the new comm for the old pid.
1828 		 */
1829 		pid = savedcmd->map_cmdline_to_pid[idx];
1830 		if (pid != NO_CMDLINE_MAP)
1831 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1832 
1833 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1834 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1835 
1836 		savedcmd->cmdline_idx = idx;
1837 	}
1838 
1839 	set_cmdline(idx, tsk->comm);
1840 	saved_tgids[idx] = tsk->tgid;
1841 	arch_spin_unlock(&trace_cmdline_lock);
1842 
1843 	return 1;
1844 }
1845 
__trace_find_cmdline(int pid,char comm[])1846 static void __trace_find_cmdline(int pid, char comm[])
1847 {
1848 	unsigned map;
1849 
1850 	if (!pid) {
1851 		strcpy(comm, "<idle>");
1852 		return;
1853 	}
1854 
1855 	if (WARN_ON_ONCE(pid < 0)) {
1856 		strcpy(comm, "<XXX>");
1857 		return;
1858 	}
1859 
1860 	if (pid > PID_MAX_DEFAULT) {
1861 		strcpy(comm, "<...>");
1862 		return;
1863 	}
1864 
1865 	map = savedcmd->map_pid_to_cmdline[pid];
1866 	if (map != NO_CMDLINE_MAP)
1867 		strcpy(comm, get_saved_cmdlines(map));
1868 	else
1869 		strcpy(comm, "<...>");
1870 }
1871 
trace_find_cmdline(int pid,char comm[])1872 void trace_find_cmdline(int pid, char comm[])
1873 {
1874 	preempt_disable();
1875 	arch_spin_lock(&trace_cmdline_lock);
1876 
1877 	__trace_find_cmdline(pid, comm);
1878 
1879 	arch_spin_unlock(&trace_cmdline_lock);
1880 	preempt_enable();
1881 }
1882 
trace_find_tgid(int pid)1883 int trace_find_tgid(int pid)
1884 {
1885 	unsigned map;
1886 	int tgid;
1887 
1888 	preempt_disable();
1889 	arch_spin_lock(&trace_cmdline_lock);
1890 	map = savedcmd->map_pid_to_cmdline[pid];
1891 	if (map != NO_CMDLINE_MAP)
1892 		tgid = saved_tgids[map];
1893 	else
1894 		tgid = -1;
1895 
1896 	arch_spin_unlock(&trace_cmdline_lock);
1897 	preempt_enable();
1898 
1899 	return tgid;
1900 }
1901 
tracing_record_cmdline(struct task_struct * tsk)1902 void tracing_record_cmdline(struct task_struct *tsk)
1903 {
1904 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1905 		return;
1906 
1907 	if (!__this_cpu_read(trace_cmdline_save))
1908 		return;
1909 
1910 	if (trace_save_cmdline(tsk))
1911 		__this_cpu_write(trace_cmdline_save, false);
1912 }
1913 
1914 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned long flags,int pc)1915 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1916 			     int pc)
1917 {
1918 	struct task_struct *tsk = current;
1919 
1920 	entry->preempt_count		= pc & 0xff;
1921 	entry->pid			= (tsk) ? tsk->pid : 0;
1922 	entry->flags =
1923 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1924 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1925 #else
1926 		TRACE_FLAG_IRQS_NOSUPPORT |
1927 #endif
1928 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1929 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1930 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1931 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1932 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1933 }
1934 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1935 
1936 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)1937 trace_event_setup(struct ring_buffer_event *event,
1938 		  int type, unsigned long flags, int pc)
1939 {
1940 	struct trace_entry *ent = ring_buffer_event_data(event);
1941 
1942 	tracing_generic_entry_update(ent, flags, pc);
1943 	ent->type = type;
1944 }
1945 
1946 struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)1947 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1948 			  int type,
1949 			  unsigned long len,
1950 			  unsigned long flags, int pc)
1951 {
1952 	struct ring_buffer_event *event;
1953 
1954 	event = ring_buffer_lock_reserve(buffer, len);
1955 	if (event != NULL)
1956 		trace_event_setup(event, type, flags, pc);
1957 
1958 	return event;
1959 }
1960 
1961 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1962 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1963 static int trace_buffered_event_ref;
1964 
1965 /**
1966  * trace_buffered_event_enable - enable buffering events
1967  *
1968  * When events are being filtered, it is quicker to use a temporary
1969  * buffer to write the event data into if there's a likely chance
1970  * that it will not be committed. The discard of the ring buffer
1971  * is not as fast as committing, and is much slower than copying
1972  * a commit.
1973  *
1974  * When an event is to be filtered, allocate per cpu buffers to
1975  * write the event data into, and if the event is filtered and discarded
1976  * it is simply dropped, otherwise, the entire data is to be committed
1977  * in one shot.
1978  */
trace_buffered_event_enable(void)1979 void trace_buffered_event_enable(void)
1980 {
1981 	struct ring_buffer_event *event;
1982 	struct page *page;
1983 	int cpu;
1984 
1985 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1986 
1987 	if (trace_buffered_event_ref++)
1988 		return;
1989 
1990 	for_each_tracing_cpu(cpu) {
1991 		page = alloc_pages_node(cpu_to_node(cpu),
1992 					GFP_KERNEL | __GFP_NORETRY, 0);
1993 		if (!page)
1994 			goto failed;
1995 
1996 		event = page_address(page);
1997 		memset(event, 0, sizeof(*event));
1998 
1999 		per_cpu(trace_buffered_event, cpu) = event;
2000 
2001 		preempt_disable();
2002 		if (cpu == smp_processor_id() &&
2003 		    this_cpu_read(trace_buffered_event) !=
2004 		    per_cpu(trace_buffered_event, cpu))
2005 			WARN_ON_ONCE(1);
2006 		preempt_enable();
2007 	}
2008 
2009 	return;
2010  failed:
2011 	trace_buffered_event_disable();
2012 }
2013 
enable_trace_buffered_event(void * data)2014 static void enable_trace_buffered_event(void *data)
2015 {
2016 	/* Probably not needed, but do it anyway */
2017 	smp_rmb();
2018 	this_cpu_dec(trace_buffered_event_cnt);
2019 }
2020 
disable_trace_buffered_event(void * data)2021 static void disable_trace_buffered_event(void *data)
2022 {
2023 	this_cpu_inc(trace_buffered_event_cnt);
2024 }
2025 
2026 /**
2027  * trace_buffered_event_disable - disable buffering events
2028  *
2029  * When a filter is removed, it is faster to not use the buffered
2030  * events, and to commit directly into the ring buffer. Free up
2031  * the temp buffers when there are no more users. This requires
2032  * special synchronization with current events.
2033  */
trace_buffered_event_disable(void)2034 void trace_buffered_event_disable(void)
2035 {
2036 	int cpu;
2037 
2038 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2039 
2040 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2041 		return;
2042 
2043 	if (--trace_buffered_event_ref)
2044 		return;
2045 
2046 	preempt_disable();
2047 	/* For each CPU, set the buffer as used. */
2048 	smp_call_function_many(tracing_buffer_mask,
2049 			       disable_trace_buffered_event, NULL, 1);
2050 	preempt_enable();
2051 
2052 	/* Wait for all current users to finish */
2053 	synchronize_sched();
2054 
2055 	for_each_tracing_cpu(cpu) {
2056 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2057 		per_cpu(trace_buffered_event, cpu) = NULL;
2058 	}
2059 	/*
2060 	 * Make sure trace_buffered_event is NULL before clearing
2061 	 * trace_buffered_event_cnt.
2062 	 */
2063 	smp_wmb();
2064 
2065 	preempt_disable();
2066 	/* Do the work on each cpu */
2067 	smp_call_function_many(tracing_buffer_mask,
2068 			       enable_trace_buffered_event, NULL, 1);
2069 	preempt_enable();
2070 }
2071 
2072 void
__buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)2073 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2074 {
2075 	__this_cpu_write(trace_cmdline_save, true);
2076 
2077 	/* If this is the temp buffer, we need to commit fully */
2078 	if (this_cpu_read(trace_buffered_event) == event) {
2079 		/* Length is in event->array[0] */
2080 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
2081 		/* Release the temp buffer */
2082 		this_cpu_dec(trace_buffered_event_cnt);
2083 	} else
2084 		ring_buffer_unlock_commit(buffer, event);
2085 }
2086 
2087 static struct ring_buffer *temp_buffer;
2088 
2089 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2090 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2091 			  struct trace_event_file *trace_file,
2092 			  int type, unsigned long len,
2093 			  unsigned long flags, int pc)
2094 {
2095 	struct ring_buffer_event *entry;
2096 	int val;
2097 
2098 	*current_rb = trace_file->tr->trace_buffer.buffer;
2099 
2100 	if ((trace_file->flags &
2101 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2102 	    (entry = this_cpu_read(trace_buffered_event))) {
2103 		/* Try to use the per cpu buffer first */
2104 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2105 		if (val == 1) {
2106 			trace_event_setup(entry, type, flags, pc);
2107 			entry->array[0] = len;
2108 			return entry;
2109 		}
2110 		this_cpu_dec(trace_buffered_event_cnt);
2111 	}
2112 
2113 	entry = trace_buffer_lock_reserve(*current_rb,
2114 					 type, len, flags, pc);
2115 	/*
2116 	 * If tracing is off, but we have triggers enabled
2117 	 * we still need to look at the event data. Use the temp_buffer
2118 	 * to store the trace event for the tigger to use. It's recusive
2119 	 * safe and will not be recorded anywhere.
2120 	 */
2121 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2122 		*current_rb = temp_buffer;
2123 		entry = trace_buffer_lock_reserve(*current_rb,
2124 						  type, len, flags, pc);
2125 	}
2126 	return entry;
2127 }
2128 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2129 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2130 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2131 				     struct ring_buffer *buffer,
2132 				     struct ring_buffer_event *event,
2133 				     unsigned long flags, int pc,
2134 				     struct pt_regs *regs)
2135 {
2136 	__buffer_unlock_commit(buffer, event);
2137 
2138 	/*
2139 	 * If regs is not set, then skip the following callers:
2140 	 *   trace_buffer_unlock_commit_regs
2141 	 *   event_trigger_unlock_commit
2142 	 *   trace_event_buffer_commit
2143 	 *   trace_event_raw_event_sched_switch
2144 	 * Note, we can still get here via blktrace, wakeup tracer
2145 	 * and mmiotrace, but that's ok if they lose a function or
2146 	 * two. They are that meaningful.
2147 	 */
2148 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2149 	ftrace_trace_userstack(buffer, flags, pc);
2150 }
2151 
2152 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2153 trace_function(struct trace_array *tr,
2154 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2155 	       int pc)
2156 {
2157 	struct trace_event_call *call = &event_function;
2158 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2159 	struct ring_buffer_event *event;
2160 	struct ftrace_entry *entry;
2161 
2162 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2163 					  flags, pc);
2164 	if (!event)
2165 		return;
2166 	entry	= ring_buffer_event_data(event);
2167 	entry->ip			= ip;
2168 	entry->parent_ip		= parent_ip;
2169 
2170 	if (!call_filter_check_discard(call, entry, buffer, event))
2171 		__buffer_unlock_commit(buffer, event);
2172 }
2173 
2174 #ifdef CONFIG_STACKTRACE
2175 
2176 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2177 struct ftrace_stack {
2178 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2179 };
2180 
2181 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2182 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2183 
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2184 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2185 				 unsigned long flags,
2186 				 int skip, int pc, struct pt_regs *regs)
2187 {
2188 	struct trace_event_call *call = &event_kernel_stack;
2189 	struct ring_buffer_event *event;
2190 	struct stack_entry *entry;
2191 	struct stack_trace trace;
2192 	int use_stack;
2193 	int size = FTRACE_STACK_ENTRIES;
2194 
2195 	trace.nr_entries	= 0;
2196 	trace.skip		= skip;
2197 
2198 	/*
2199 	 * Add two, for this function and the call to save_stack_trace()
2200 	 * If regs is set, then these functions will not be in the way.
2201 	 */
2202 	if (!regs)
2203 		trace.skip += 2;
2204 
2205 	/*
2206 	 * Since events can happen in NMIs there's no safe way to
2207 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2208 	 * or NMI comes in, it will just have to use the default
2209 	 * FTRACE_STACK_SIZE.
2210 	 */
2211 	preempt_disable_notrace();
2212 
2213 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2214 	/*
2215 	 * We don't need any atomic variables, just a barrier.
2216 	 * If an interrupt comes in, we don't care, because it would
2217 	 * have exited and put the counter back to what we want.
2218 	 * We just need a barrier to keep gcc from moving things
2219 	 * around.
2220 	 */
2221 	barrier();
2222 	if (use_stack == 1) {
2223 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2224 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2225 
2226 		if (regs)
2227 			save_stack_trace_regs(regs, &trace);
2228 		else
2229 			save_stack_trace(&trace);
2230 
2231 		if (trace.nr_entries > size)
2232 			size = trace.nr_entries;
2233 	} else
2234 		/* From now on, use_stack is a boolean */
2235 		use_stack = 0;
2236 
2237 	size *= sizeof(unsigned long);
2238 
2239 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2240 					  sizeof(*entry) + size, flags, pc);
2241 	if (!event)
2242 		goto out;
2243 	entry = ring_buffer_event_data(event);
2244 
2245 	memset(&entry->caller, 0, size);
2246 
2247 	if (use_stack)
2248 		memcpy(&entry->caller, trace.entries,
2249 		       trace.nr_entries * sizeof(unsigned long));
2250 	else {
2251 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2252 		trace.entries		= entry->caller;
2253 		if (regs)
2254 			save_stack_trace_regs(regs, &trace);
2255 		else
2256 			save_stack_trace(&trace);
2257 	}
2258 
2259 	entry->size = trace.nr_entries;
2260 
2261 	if (!call_filter_check_discard(call, entry, buffer, event))
2262 		__buffer_unlock_commit(buffer, event);
2263 
2264  out:
2265 	/* Again, don't let gcc optimize things here */
2266 	barrier();
2267 	__this_cpu_dec(ftrace_stack_reserve);
2268 	preempt_enable_notrace();
2269 
2270 }
2271 
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2272 static inline void ftrace_trace_stack(struct trace_array *tr,
2273 				      struct ring_buffer *buffer,
2274 				      unsigned long flags,
2275 				      int skip, int pc, struct pt_regs *regs)
2276 {
2277 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2278 		return;
2279 
2280 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2281 }
2282 
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)2283 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2284 		   int pc)
2285 {
2286 	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2287 }
2288 
2289 /**
2290  * trace_dump_stack - record a stack back trace in the trace buffer
2291  * @skip: Number of functions to skip (helper handlers)
2292  */
trace_dump_stack(int skip)2293 void trace_dump_stack(int skip)
2294 {
2295 	unsigned long flags;
2296 
2297 	if (tracing_disabled || tracing_selftest_running)
2298 		return;
2299 
2300 	local_save_flags(flags);
2301 
2302 	/*
2303 	 * Skip 3 more, seems to get us at the caller of
2304 	 * this function.
2305 	 */
2306 	skip += 3;
2307 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2308 			     flags, skip, preempt_count(), NULL);
2309 }
2310 
2311 static DEFINE_PER_CPU(int, user_stack_count);
2312 
2313 void
ftrace_trace_userstack(struct ring_buffer * buffer,unsigned long flags,int pc)2314 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2315 {
2316 	struct trace_event_call *call = &event_user_stack;
2317 	struct ring_buffer_event *event;
2318 	struct userstack_entry *entry;
2319 	struct stack_trace trace;
2320 
2321 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2322 		return;
2323 
2324 	/*
2325 	 * NMIs can not handle page faults, even with fix ups.
2326 	 * The save user stack can (and often does) fault.
2327 	 */
2328 	if (unlikely(in_nmi()))
2329 		return;
2330 
2331 	/*
2332 	 * prevent recursion, since the user stack tracing may
2333 	 * trigger other kernel events.
2334 	 */
2335 	preempt_disable();
2336 	if (__this_cpu_read(user_stack_count))
2337 		goto out;
2338 
2339 	__this_cpu_inc(user_stack_count);
2340 
2341 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2342 					  sizeof(*entry), flags, pc);
2343 	if (!event)
2344 		goto out_drop_count;
2345 	entry	= ring_buffer_event_data(event);
2346 
2347 	entry->tgid		= current->tgid;
2348 	memset(&entry->caller, 0, sizeof(entry->caller));
2349 
2350 	trace.nr_entries	= 0;
2351 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2352 	trace.skip		= 0;
2353 	trace.entries		= entry->caller;
2354 
2355 	save_stack_trace_user(&trace);
2356 	if (!call_filter_check_discard(call, entry, buffer, event))
2357 		__buffer_unlock_commit(buffer, event);
2358 
2359  out_drop_count:
2360 	__this_cpu_dec(user_stack_count);
2361  out:
2362 	preempt_enable();
2363 }
2364 
2365 #ifdef UNUSED
__trace_userstack(struct trace_array * tr,unsigned long flags)2366 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2367 {
2368 	ftrace_trace_userstack(tr, flags, preempt_count());
2369 }
2370 #endif /* UNUSED */
2371 
2372 #endif /* CONFIG_STACKTRACE */
2373 
2374 /* created for use with alloc_percpu */
2375 struct trace_buffer_struct {
2376 	int nesting;
2377 	char buffer[4][TRACE_BUF_SIZE];
2378 };
2379 
2380 static struct trace_buffer_struct *trace_percpu_buffer;
2381 
2382 /*
2383  * Thise allows for lockless recording.  If we're nested too deeply, then
2384  * this returns NULL.
2385  */
get_trace_buf(void)2386 static char *get_trace_buf(void)
2387 {
2388 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2389 
2390 	if (!buffer || buffer->nesting >= 4)
2391 		return NULL;
2392 
2393 	buffer->nesting++;
2394 
2395 	/* Interrupts must see nesting incremented before we use the buffer */
2396 	barrier();
2397 	return &buffer->buffer[buffer->nesting][0];
2398 }
2399 
put_trace_buf(void)2400 static void put_trace_buf(void)
2401 {
2402 	/* Don't let the decrement of nesting leak before this */
2403 	barrier();
2404 	this_cpu_dec(trace_percpu_buffer->nesting);
2405 }
2406 
alloc_percpu_trace_buffer(void)2407 static int alloc_percpu_trace_buffer(void)
2408 {
2409 	struct trace_buffer_struct *buffers;
2410 
2411 	buffers = alloc_percpu(struct trace_buffer_struct);
2412 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2413 		return -ENOMEM;
2414 
2415 	trace_percpu_buffer = buffers;
2416 	return 0;
2417 }
2418 
2419 static int buffers_allocated;
2420 
trace_printk_init_buffers(void)2421 void trace_printk_init_buffers(void)
2422 {
2423 	if (buffers_allocated)
2424 		return;
2425 
2426 	if (alloc_percpu_trace_buffer())
2427 		return;
2428 
2429 	/* trace_printk() is for debug use only. Don't use it in production. */
2430 
2431 	pr_warn("\n");
2432 	pr_warn("**********************************************************\n");
2433 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2434 	pr_warn("**                                                      **\n");
2435 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2436 	pr_warn("**                                                      **\n");
2437 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2438 	pr_warn("** unsafe for production use.                           **\n");
2439 	pr_warn("**                                                      **\n");
2440 	pr_warn("** If you see this message and you are not debugging    **\n");
2441 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2442 	pr_warn("**                                                      **\n");
2443 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2444 	pr_warn("**********************************************************\n");
2445 
2446 	/* Expand the buffers to set size */
2447 	tracing_update_buffers();
2448 
2449 	buffers_allocated = 1;
2450 
2451 	/*
2452 	 * trace_printk_init_buffers() can be called by modules.
2453 	 * If that happens, then we need to start cmdline recording
2454 	 * directly here. If the global_trace.buffer is already
2455 	 * allocated here, then this was called by module code.
2456 	 */
2457 	if (global_trace.trace_buffer.buffer)
2458 		tracing_start_cmdline_record();
2459 }
2460 
trace_printk_start_comm(void)2461 void trace_printk_start_comm(void)
2462 {
2463 	/* Start tracing comms if trace printk is set */
2464 	if (!buffers_allocated)
2465 		return;
2466 	tracing_start_cmdline_record();
2467 }
2468 
trace_printk_start_stop_comm(int enabled)2469 static void trace_printk_start_stop_comm(int enabled)
2470 {
2471 	if (!buffers_allocated)
2472 		return;
2473 
2474 	if (enabled)
2475 		tracing_start_cmdline_record();
2476 	else
2477 		tracing_stop_cmdline_record();
2478 }
2479 
2480 /**
2481  * trace_vbprintk - write binary msg to tracing buffer
2482  *
2483  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)2484 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2485 {
2486 	struct trace_event_call *call = &event_bprint;
2487 	struct ring_buffer_event *event;
2488 	struct ring_buffer *buffer;
2489 	struct trace_array *tr = &global_trace;
2490 	struct bprint_entry *entry;
2491 	unsigned long flags;
2492 	char *tbuffer;
2493 	int len = 0, size, pc;
2494 
2495 	if (unlikely(tracing_selftest_running || tracing_disabled))
2496 		return 0;
2497 
2498 	/* Don't pollute graph traces with trace_vprintk internals */
2499 	pause_graph_tracing();
2500 
2501 	pc = preempt_count();
2502 	preempt_disable_notrace();
2503 
2504 	tbuffer = get_trace_buf();
2505 	if (!tbuffer) {
2506 		len = 0;
2507 		goto out_nobuffer;
2508 	}
2509 
2510 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2511 
2512 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2513 		goto out;
2514 
2515 	local_save_flags(flags);
2516 	size = sizeof(*entry) + sizeof(u32) * len;
2517 	buffer = tr->trace_buffer.buffer;
2518 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2519 					  flags, pc);
2520 	if (!event)
2521 		goto out;
2522 	entry = ring_buffer_event_data(event);
2523 	entry->ip			= ip;
2524 	entry->fmt			= fmt;
2525 
2526 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2527 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2528 		__buffer_unlock_commit(buffer, event);
2529 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2530 	}
2531 
2532 out:
2533 	put_trace_buf();
2534 
2535 out_nobuffer:
2536 	preempt_enable_notrace();
2537 	unpause_graph_tracing();
2538 
2539 	return len;
2540 }
2541 EXPORT_SYMBOL_GPL(trace_vbprintk);
2542 
2543 static int
__trace_array_vprintk(struct ring_buffer * buffer,unsigned long ip,const char * fmt,va_list args)2544 __trace_array_vprintk(struct ring_buffer *buffer,
2545 		      unsigned long ip, const char *fmt, va_list args)
2546 {
2547 	struct trace_event_call *call = &event_print;
2548 	struct ring_buffer_event *event;
2549 	int len = 0, size, pc;
2550 	struct print_entry *entry;
2551 	unsigned long flags;
2552 	char *tbuffer;
2553 
2554 	if (tracing_disabled || tracing_selftest_running)
2555 		return 0;
2556 
2557 	/* Don't pollute graph traces with trace_vprintk internals */
2558 	pause_graph_tracing();
2559 
2560 	pc = preempt_count();
2561 	preempt_disable_notrace();
2562 
2563 
2564 	tbuffer = get_trace_buf();
2565 	if (!tbuffer) {
2566 		len = 0;
2567 		goto out_nobuffer;
2568 	}
2569 
2570 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2571 
2572 	local_save_flags(flags);
2573 	size = sizeof(*entry) + len + 1;
2574 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2575 					  flags, pc);
2576 	if (!event)
2577 		goto out;
2578 	entry = ring_buffer_event_data(event);
2579 	entry->ip = ip;
2580 
2581 	memcpy(&entry->buf, tbuffer, len + 1);
2582 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2583 		__buffer_unlock_commit(buffer, event);
2584 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2585 	}
2586 
2587 out:
2588 	put_trace_buf();
2589 
2590 out_nobuffer:
2591 	preempt_enable_notrace();
2592 	unpause_graph_tracing();
2593 
2594 	return len;
2595 }
2596 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)2597 int trace_array_vprintk(struct trace_array *tr,
2598 			unsigned long ip, const char *fmt, va_list args)
2599 {
2600 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2601 }
2602 
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)2603 int trace_array_printk(struct trace_array *tr,
2604 		       unsigned long ip, const char *fmt, ...)
2605 {
2606 	int ret;
2607 	va_list ap;
2608 
2609 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2610 		return 0;
2611 
2612 	va_start(ap, fmt);
2613 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2614 	va_end(ap);
2615 	return ret;
2616 }
2617 
trace_array_printk_buf(struct ring_buffer * buffer,unsigned long ip,const char * fmt,...)2618 int trace_array_printk_buf(struct ring_buffer *buffer,
2619 			   unsigned long ip, const char *fmt, ...)
2620 {
2621 	int ret;
2622 	va_list ap;
2623 
2624 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2625 		return 0;
2626 
2627 	va_start(ap, fmt);
2628 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2629 	va_end(ap);
2630 	return ret;
2631 }
2632 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)2633 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2634 {
2635 	return trace_array_vprintk(&global_trace, ip, fmt, args);
2636 }
2637 EXPORT_SYMBOL_GPL(trace_vprintk);
2638 
trace_iterator_increment(struct trace_iterator * iter)2639 static void trace_iterator_increment(struct trace_iterator *iter)
2640 {
2641 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2642 
2643 	iter->idx++;
2644 	if (buf_iter)
2645 		ring_buffer_read(buf_iter, NULL);
2646 }
2647 
2648 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)2649 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2650 		unsigned long *lost_events)
2651 {
2652 	struct ring_buffer_event *event;
2653 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2654 
2655 	if (buf_iter)
2656 		event = ring_buffer_iter_peek(buf_iter, ts);
2657 	else
2658 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2659 					 lost_events);
2660 
2661 	if (event) {
2662 		iter->ent_size = ring_buffer_event_length(event);
2663 		return ring_buffer_event_data(event);
2664 	}
2665 	iter->ent_size = 0;
2666 	return NULL;
2667 }
2668 
2669 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)2670 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2671 		  unsigned long *missing_events, u64 *ent_ts)
2672 {
2673 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2674 	struct trace_entry *ent, *next = NULL;
2675 	unsigned long lost_events = 0, next_lost = 0;
2676 	int cpu_file = iter->cpu_file;
2677 	u64 next_ts = 0, ts;
2678 	int next_cpu = -1;
2679 	int next_size = 0;
2680 	int cpu;
2681 
2682 	/*
2683 	 * If we are in a per_cpu trace file, don't bother by iterating over
2684 	 * all cpu and peek directly.
2685 	 */
2686 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2687 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2688 			return NULL;
2689 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2690 		if (ent_cpu)
2691 			*ent_cpu = cpu_file;
2692 
2693 		return ent;
2694 	}
2695 
2696 	for_each_tracing_cpu(cpu) {
2697 
2698 		if (ring_buffer_empty_cpu(buffer, cpu))
2699 			continue;
2700 
2701 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2702 
2703 		/*
2704 		 * Pick the entry with the smallest timestamp:
2705 		 */
2706 		if (ent && (!next || ts < next_ts)) {
2707 			next = ent;
2708 			next_cpu = cpu;
2709 			next_ts = ts;
2710 			next_lost = lost_events;
2711 			next_size = iter->ent_size;
2712 		}
2713 	}
2714 
2715 	iter->ent_size = next_size;
2716 
2717 	if (ent_cpu)
2718 		*ent_cpu = next_cpu;
2719 
2720 	if (ent_ts)
2721 		*ent_ts = next_ts;
2722 
2723 	if (missing_events)
2724 		*missing_events = next_lost;
2725 
2726 	return next;
2727 }
2728 
2729 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)2730 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2731 					  int *ent_cpu, u64 *ent_ts)
2732 {
2733 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2734 }
2735 
2736 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)2737 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2738 {
2739 	iter->ent = __find_next_entry(iter, &iter->cpu,
2740 				      &iter->lost_events, &iter->ts);
2741 
2742 	if (iter->ent)
2743 		trace_iterator_increment(iter);
2744 
2745 	return iter->ent ? iter : NULL;
2746 }
2747 
trace_consume(struct trace_iterator * iter)2748 static void trace_consume(struct trace_iterator *iter)
2749 {
2750 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2751 			    &iter->lost_events);
2752 }
2753 
s_next(struct seq_file * m,void * v,loff_t * pos)2754 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2755 {
2756 	struct trace_iterator *iter = m->private;
2757 	int i = (int)*pos;
2758 	void *ent;
2759 
2760 	WARN_ON_ONCE(iter->leftover);
2761 
2762 	(*pos)++;
2763 
2764 	/* can't go backwards */
2765 	if (iter->idx > i)
2766 		return NULL;
2767 
2768 	if (iter->idx < 0)
2769 		ent = trace_find_next_entry_inc(iter);
2770 	else
2771 		ent = iter;
2772 
2773 	while (ent && iter->idx < i)
2774 		ent = trace_find_next_entry_inc(iter);
2775 
2776 	iter->pos = *pos;
2777 
2778 	return ent;
2779 }
2780 
tracing_iter_reset(struct trace_iterator * iter,int cpu)2781 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2782 {
2783 	struct ring_buffer_event *event;
2784 	struct ring_buffer_iter *buf_iter;
2785 	unsigned long entries = 0;
2786 	u64 ts;
2787 
2788 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2789 
2790 	buf_iter = trace_buffer_iter(iter, cpu);
2791 	if (!buf_iter)
2792 		return;
2793 
2794 	ring_buffer_iter_reset(buf_iter);
2795 
2796 	/*
2797 	 * We could have the case with the max latency tracers
2798 	 * that a reset never took place on a cpu. This is evident
2799 	 * by the timestamp being before the start of the buffer.
2800 	 */
2801 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2802 		if (ts >= iter->trace_buffer->time_start)
2803 			break;
2804 		entries++;
2805 		ring_buffer_read(buf_iter, NULL);
2806 	}
2807 
2808 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2809 }
2810 
2811 /*
2812  * The current tracer is copied to avoid a global locking
2813  * all around.
2814  */
s_start(struct seq_file * m,loff_t * pos)2815 static void *s_start(struct seq_file *m, loff_t *pos)
2816 {
2817 	struct trace_iterator *iter = m->private;
2818 	struct trace_array *tr = iter->tr;
2819 	int cpu_file = iter->cpu_file;
2820 	void *p = NULL;
2821 	loff_t l = 0;
2822 	int cpu;
2823 
2824 	/*
2825 	 * copy the tracer to avoid using a global lock all around.
2826 	 * iter->trace is a copy of current_trace, the pointer to the
2827 	 * name may be used instead of a strcmp(), as iter->trace->name
2828 	 * will point to the same string as current_trace->name.
2829 	 */
2830 	mutex_lock(&trace_types_lock);
2831 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2832 		*iter->trace = *tr->current_trace;
2833 	mutex_unlock(&trace_types_lock);
2834 
2835 #ifdef CONFIG_TRACER_MAX_TRACE
2836 	if (iter->snapshot && iter->trace->use_max_tr)
2837 		return ERR_PTR(-EBUSY);
2838 #endif
2839 
2840 	if (!iter->snapshot)
2841 		atomic_inc(&trace_record_cmdline_disabled);
2842 
2843 	if (*pos != iter->pos) {
2844 		iter->ent = NULL;
2845 		iter->cpu = 0;
2846 		iter->idx = -1;
2847 
2848 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2849 			for_each_tracing_cpu(cpu)
2850 				tracing_iter_reset(iter, cpu);
2851 		} else
2852 			tracing_iter_reset(iter, cpu_file);
2853 
2854 		iter->leftover = 0;
2855 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2856 			;
2857 
2858 	} else {
2859 		/*
2860 		 * If we overflowed the seq_file before, then we want
2861 		 * to just reuse the trace_seq buffer again.
2862 		 */
2863 		if (iter->leftover)
2864 			p = iter;
2865 		else {
2866 			l = *pos - 1;
2867 			p = s_next(m, p, &l);
2868 		}
2869 	}
2870 
2871 	trace_event_read_lock();
2872 	trace_access_lock(cpu_file);
2873 	return p;
2874 }
2875 
s_stop(struct seq_file * m,void * p)2876 static void s_stop(struct seq_file *m, void *p)
2877 {
2878 	struct trace_iterator *iter = m->private;
2879 
2880 #ifdef CONFIG_TRACER_MAX_TRACE
2881 	if (iter->snapshot && iter->trace->use_max_tr)
2882 		return;
2883 #endif
2884 
2885 	if (!iter->snapshot)
2886 		atomic_dec(&trace_record_cmdline_disabled);
2887 
2888 	trace_access_unlock(iter->cpu_file);
2889 	trace_event_read_unlock();
2890 }
2891 
2892 static void
get_total_entries(struct trace_buffer * buf,unsigned long * total,unsigned long * entries)2893 get_total_entries(struct trace_buffer *buf,
2894 		  unsigned long *total, unsigned long *entries)
2895 {
2896 	unsigned long count;
2897 	int cpu;
2898 
2899 	*total = 0;
2900 	*entries = 0;
2901 
2902 	for_each_tracing_cpu(cpu) {
2903 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2904 		/*
2905 		 * If this buffer has skipped entries, then we hold all
2906 		 * entries for the trace and we need to ignore the
2907 		 * ones before the time stamp.
2908 		 */
2909 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2910 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2911 			/* total is the same as the entries */
2912 			*total += count;
2913 		} else
2914 			*total += count +
2915 				ring_buffer_overrun_cpu(buf->buffer, cpu);
2916 		*entries += count;
2917 	}
2918 }
2919 
print_lat_help_header(struct seq_file * m)2920 static void print_lat_help_header(struct seq_file *m)
2921 {
2922 	seq_puts(m, "#                  _------=> CPU#            \n"
2923 		    "#                 / _-----=> irqs-off        \n"
2924 		    "#                | / _----=> need-resched    \n"
2925 		    "#                || / _---=> hardirq/softirq \n"
2926 		    "#                ||| / _--=> preempt-depth   \n"
2927 		    "#                |||| /     delay            \n"
2928 		    "#  cmd     pid   ||||| time  |   caller      \n"
2929 		    "#     \\   /      |||||  \\    |   /         \n");
2930 }
2931 
print_event_info(struct trace_buffer * buf,struct seq_file * m)2932 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2933 {
2934 	unsigned long total;
2935 	unsigned long entries;
2936 
2937 	get_total_entries(buf, &total, &entries);
2938 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2939 		   entries, total, num_online_cpus());
2940 	seq_puts(m, "#\n");
2941 }
2942 
print_func_help_header(struct trace_buffer * buf,struct seq_file * m)2943 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2944 {
2945 	print_event_info(buf, m);
2946 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2947 		    "#              | |       |          |         |\n");
2948 }
2949 
print_func_help_header_tgid(struct trace_buffer * buf,struct seq_file * m)2950 static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
2951 {
2952 	print_event_info(buf, m);
2953 	seq_puts(m, "#           TASK-PID    TGID   CPU#      TIMESTAMP  FUNCTION\n");
2954 	seq_puts(m, "#              | |        |      |          |         |\n");
2955 }
2956 
print_func_help_header_irq(struct trace_buffer * buf,struct seq_file * m)2957 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2958 {
2959 	print_event_info(buf, m);
2960 	seq_puts(m, "#                              _-----=> irqs-off\n"
2961 		    "#                             / _----=> need-resched\n"
2962 		    "#                            | / _---=> hardirq/softirq\n"
2963 		    "#                            || / _--=> preempt-depth\n"
2964 		    "#                            ||| /     delay\n"
2965 		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2966 		    "#              | |       |   ||||       |         |\n");
2967 }
2968 
print_func_help_header_irq_tgid(struct trace_buffer * buf,struct seq_file * m)2969 static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
2970 {
2971 	print_event_info(buf, m);
2972 	seq_puts(m, "#                                      _-----=> irqs-off\n");
2973 	seq_puts(m, "#                                     / _----=> need-resched\n");
2974 	seq_puts(m, "#                                    | / _---=> hardirq/softirq\n");
2975 	seq_puts(m, "#                                    || / _--=> preempt-depth\n");
2976 	seq_puts(m, "#                                    ||| /     delay\n");
2977 	seq_puts(m, "#           TASK-PID    TGID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2978 	seq_puts(m, "#              | |        |      |   ||||       |         |\n");
2979 }
2980 
2981 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)2982 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2983 {
2984 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2985 	struct trace_buffer *buf = iter->trace_buffer;
2986 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2987 	struct tracer *type = iter->trace;
2988 	unsigned long entries;
2989 	unsigned long total;
2990 	const char *name = "preemption";
2991 
2992 	name = type->name;
2993 
2994 	get_total_entries(buf, &total, &entries);
2995 
2996 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2997 		   name, UTS_RELEASE);
2998 	seq_puts(m, "# -----------------------------------"
2999 		 "---------------------------------\n");
3000 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3001 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3002 		   nsecs_to_usecs(data->saved_latency),
3003 		   entries,
3004 		   total,
3005 		   buf->cpu,
3006 #if defined(CONFIG_PREEMPT_NONE)
3007 		   "server",
3008 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3009 		   "desktop",
3010 #elif defined(CONFIG_PREEMPT)
3011 		   "preempt",
3012 #else
3013 		   "unknown",
3014 #endif
3015 		   /* These are reserved for later use */
3016 		   0, 0, 0, 0);
3017 #ifdef CONFIG_SMP
3018 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3019 #else
3020 	seq_puts(m, ")\n");
3021 #endif
3022 	seq_puts(m, "#    -----------------\n");
3023 	seq_printf(m, "#    | task: %.16s-%d "
3024 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3025 		   data->comm, data->pid,
3026 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3027 		   data->policy, data->rt_priority);
3028 	seq_puts(m, "#    -----------------\n");
3029 
3030 	if (data->critical_start) {
3031 		seq_puts(m, "#  => started at: ");
3032 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3033 		trace_print_seq(m, &iter->seq);
3034 		seq_puts(m, "\n#  => ended at:   ");
3035 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3036 		trace_print_seq(m, &iter->seq);
3037 		seq_puts(m, "\n#\n");
3038 	}
3039 
3040 	seq_puts(m, "#\n");
3041 }
3042 
test_cpu_buff_start(struct trace_iterator * iter)3043 static void test_cpu_buff_start(struct trace_iterator *iter)
3044 {
3045 	struct trace_seq *s = &iter->seq;
3046 	struct trace_array *tr = iter->tr;
3047 
3048 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3049 		return;
3050 
3051 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3052 		return;
3053 
3054 	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3055 		return;
3056 
3057 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3058 		return;
3059 
3060 	if (iter->started)
3061 		cpumask_set_cpu(iter->cpu, iter->started);
3062 
3063 	/* Don't print started cpu buffer for the first entry of the trace */
3064 	if (iter->idx > 1)
3065 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3066 				iter->cpu);
3067 }
3068 
print_trace_fmt(struct trace_iterator * iter)3069 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3070 {
3071 	struct trace_array *tr = iter->tr;
3072 	struct trace_seq *s = &iter->seq;
3073 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3074 	struct trace_entry *entry;
3075 	struct trace_event *event;
3076 
3077 	entry = iter->ent;
3078 
3079 	test_cpu_buff_start(iter);
3080 
3081 	event = ftrace_find_event(entry->type);
3082 
3083 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3084 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3085 			trace_print_lat_context(iter);
3086 		else
3087 			trace_print_context(iter);
3088 	}
3089 
3090 	if (trace_seq_has_overflowed(s))
3091 		return TRACE_TYPE_PARTIAL_LINE;
3092 
3093 	if (event)
3094 		return event->funcs->trace(iter, sym_flags, event);
3095 
3096 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3097 
3098 	return trace_handle_return(s);
3099 }
3100 
print_raw_fmt(struct trace_iterator * iter)3101 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3102 {
3103 	struct trace_array *tr = iter->tr;
3104 	struct trace_seq *s = &iter->seq;
3105 	struct trace_entry *entry;
3106 	struct trace_event *event;
3107 
3108 	entry = iter->ent;
3109 
3110 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3111 		trace_seq_printf(s, "%d %d %llu ",
3112 				 entry->pid, iter->cpu, iter->ts);
3113 
3114 	if (trace_seq_has_overflowed(s))
3115 		return TRACE_TYPE_PARTIAL_LINE;
3116 
3117 	event = ftrace_find_event(entry->type);
3118 	if (event)
3119 		return event->funcs->raw(iter, 0, event);
3120 
3121 	trace_seq_printf(s, "%d ?\n", entry->type);
3122 
3123 	return trace_handle_return(s);
3124 }
3125 
print_hex_fmt(struct trace_iterator * iter)3126 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3127 {
3128 	struct trace_array *tr = iter->tr;
3129 	struct trace_seq *s = &iter->seq;
3130 	unsigned char newline = '\n';
3131 	struct trace_entry *entry;
3132 	struct trace_event *event;
3133 
3134 	entry = iter->ent;
3135 
3136 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3137 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3138 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3139 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3140 		if (trace_seq_has_overflowed(s))
3141 			return TRACE_TYPE_PARTIAL_LINE;
3142 	}
3143 
3144 	event = ftrace_find_event(entry->type);
3145 	if (event) {
3146 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3147 		if (ret != TRACE_TYPE_HANDLED)
3148 			return ret;
3149 	}
3150 
3151 	SEQ_PUT_FIELD(s, newline);
3152 
3153 	return trace_handle_return(s);
3154 }
3155 
print_bin_fmt(struct trace_iterator * iter)3156 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3157 {
3158 	struct trace_array *tr = iter->tr;
3159 	struct trace_seq *s = &iter->seq;
3160 	struct trace_entry *entry;
3161 	struct trace_event *event;
3162 
3163 	entry = iter->ent;
3164 
3165 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3166 		SEQ_PUT_FIELD(s, entry->pid);
3167 		SEQ_PUT_FIELD(s, iter->cpu);
3168 		SEQ_PUT_FIELD(s, iter->ts);
3169 		if (trace_seq_has_overflowed(s))
3170 			return TRACE_TYPE_PARTIAL_LINE;
3171 	}
3172 
3173 	event = ftrace_find_event(entry->type);
3174 	return event ? event->funcs->binary(iter, 0, event) :
3175 		TRACE_TYPE_HANDLED;
3176 }
3177 
trace_empty(struct trace_iterator * iter)3178 int trace_empty(struct trace_iterator *iter)
3179 {
3180 	struct ring_buffer_iter *buf_iter;
3181 	int cpu;
3182 
3183 	/* If we are looking at one CPU buffer, only check that one */
3184 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3185 		cpu = iter->cpu_file;
3186 		buf_iter = trace_buffer_iter(iter, cpu);
3187 		if (buf_iter) {
3188 			if (!ring_buffer_iter_empty(buf_iter))
3189 				return 0;
3190 		} else {
3191 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3192 				return 0;
3193 		}
3194 		return 1;
3195 	}
3196 
3197 	for_each_tracing_cpu(cpu) {
3198 		buf_iter = trace_buffer_iter(iter, cpu);
3199 		if (buf_iter) {
3200 			if (!ring_buffer_iter_empty(buf_iter))
3201 				return 0;
3202 		} else {
3203 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3204 				return 0;
3205 		}
3206 	}
3207 
3208 	return 1;
3209 }
3210 
3211 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3212 enum print_line_t print_trace_line(struct trace_iterator *iter)
3213 {
3214 	struct trace_array *tr = iter->tr;
3215 	unsigned long trace_flags = tr->trace_flags;
3216 	enum print_line_t ret;
3217 
3218 	if (iter->lost_events) {
3219 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3220 				 iter->cpu, iter->lost_events);
3221 		if (trace_seq_has_overflowed(&iter->seq))
3222 			return TRACE_TYPE_PARTIAL_LINE;
3223 	}
3224 
3225 	if (iter->trace && iter->trace->print_line) {
3226 		ret = iter->trace->print_line(iter);
3227 		if (ret != TRACE_TYPE_UNHANDLED)
3228 			return ret;
3229 	}
3230 
3231 	if (iter->ent->type == TRACE_BPUTS &&
3232 			trace_flags & TRACE_ITER_PRINTK &&
3233 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3234 		return trace_print_bputs_msg_only(iter);
3235 
3236 	if (iter->ent->type == TRACE_BPRINT &&
3237 			trace_flags & TRACE_ITER_PRINTK &&
3238 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3239 		return trace_print_bprintk_msg_only(iter);
3240 
3241 	if (iter->ent->type == TRACE_PRINT &&
3242 			trace_flags & TRACE_ITER_PRINTK &&
3243 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3244 		return trace_print_printk_msg_only(iter);
3245 
3246 	if (trace_flags & TRACE_ITER_BIN)
3247 		return print_bin_fmt(iter);
3248 
3249 	if (trace_flags & TRACE_ITER_HEX)
3250 		return print_hex_fmt(iter);
3251 
3252 	if (trace_flags & TRACE_ITER_RAW)
3253 		return print_raw_fmt(iter);
3254 
3255 	return print_trace_fmt(iter);
3256 }
3257 
trace_latency_header(struct seq_file * m)3258 void trace_latency_header(struct seq_file *m)
3259 {
3260 	struct trace_iterator *iter = m->private;
3261 	struct trace_array *tr = iter->tr;
3262 
3263 	/* print nothing if the buffers are empty */
3264 	if (trace_empty(iter))
3265 		return;
3266 
3267 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3268 		print_trace_header(m, iter);
3269 
3270 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3271 		print_lat_help_header(m);
3272 }
3273 
trace_default_header(struct seq_file * m)3274 void trace_default_header(struct seq_file *m)
3275 {
3276 	struct trace_iterator *iter = m->private;
3277 	struct trace_array *tr = iter->tr;
3278 	unsigned long trace_flags = tr->trace_flags;
3279 
3280 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3281 		return;
3282 
3283 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3284 		/* print nothing if the buffers are empty */
3285 		if (trace_empty(iter))
3286 			return;
3287 		print_trace_header(m, iter);
3288 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3289 			print_lat_help_header(m);
3290 	} else {
3291 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3292 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3293 				if (trace_flags & TRACE_ITER_TGID)
3294 					print_func_help_header_irq_tgid(iter->trace_buffer, m);
3295 				else
3296 					print_func_help_header_irq(iter->trace_buffer, m);
3297 			else
3298 				if (trace_flags & TRACE_ITER_TGID)
3299 					print_func_help_header_tgid(iter->trace_buffer, m);
3300 				else
3301 					print_func_help_header(iter->trace_buffer, m);
3302 		}
3303 	}
3304 }
3305 
test_ftrace_alive(struct seq_file * m)3306 static void test_ftrace_alive(struct seq_file *m)
3307 {
3308 	if (!ftrace_is_dead())
3309 		return;
3310 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3311 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3312 }
3313 
3314 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)3315 static void show_snapshot_main_help(struct seq_file *m)
3316 {
3317 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3318 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3319 		    "#                      Takes a snapshot of the main buffer.\n"
3320 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3321 		    "#                      (Doesn't have to be '2' works with any number that\n"
3322 		    "#                       is not a '0' or '1')\n");
3323 }
3324 
show_snapshot_percpu_help(struct seq_file * m)3325 static void show_snapshot_percpu_help(struct seq_file *m)
3326 {
3327 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3328 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3329 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3330 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3331 #else
3332 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3333 		    "#                     Must use main snapshot file to allocate.\n");
3334 #endif
3335 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3336 		    "#                      (Doesn't have to be '2' works with any number that\n"
3337 		    "#                       is not a '0' or '1')\n");
3338 }
3339 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3340 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3341 {
3342 	if (iter->tr->allocated_snapshot)
3343 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3344 	else
3345 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3346 
3347 	seq_puts(m, "# Snapshot commands:\n");
3348 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3349 		show_snapshot_main_help(m);
3350 	else
3351 		show_snapshot_percpu_help(m);
3352 }
3353 #else
3354 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3355 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3356 #endif
3357 
s_show(struct seq_file * m,void * v)3358 static int s_show(struct seq_file *m, void *v)
3359 {
3360 	struct trace_iterator *iter = v;
3361 	int ret;
3362 
3363 	if (iter->ent == NULL) {
3364 		if (iter->tr) {
3365 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3366 			seq_puts(m, "#\n");
3367 			test_ftrace_alive(m);
3368 		}
3369 		if (iter->snapshot && trace_empty(iter))
3370 			print_snapshot_help(m, iter);
3371 		else if (iter->trace && iter->trace->print_header)
3372 			iter->trace->print_header(m);
3373 		else
3374 			trace_default_header(m);
3375 
3376 	} else if (iter->leftover) {
3377 		/*
3378 		 * If we filled the seq_file buffer earlier, we
3379 		 * want to just show it now.
3380 		 */
3381 		ret = trace_print_seq(m, &iter->seq);
3382 
3383 		/* ret should this time be zero, but you never know */
3384 		iter->leftover = ret;
3385 
3386 	} else {
3387 		print_trace_line(iter);
3388 		ret = trace_print_seq(m, &iter->seq);
3389 		/*
3390 		 * If we overflow the seq_file buffer, then it will
3391 		 * ask us for this data again at start up.
3392 		 * Use that instead.
3393 		 *  ret is 0 if seq_file write succeeded.
3394 		 *        -1 otherwise.
3395 		 */
3396 		iter->leftover = ret;
3397 	}
3398 
3399 	return 0;
3400 }
3401 
3402 /*
3403  * Should be used after trace_array_get(), trace_types_lock
3404  * ensures that i_cdev was already initialized.
3405  */
tracing_get_cpu(struct inode * inode)3406 static inline int tracing_get_cpu(struct inode *inode)
3407 {
3408 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3409 		return (long)inode->i_cdev - 1;
3410 	return RING_BUFFER_ALL_CPUS;
3411 }
3412 
3413 static const struct seq_operations tracer_seq_ops = {
3414 	.start		= s_start,
3415 	.next		= s_next,
3416 	.stop		= s_stop,
3417 	.show		= s_show,
3418 };
3419 
3420 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3421 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3422 {
3423 	struct trace_array *tr = inode->i_private;
3424 	struct trace_iterator *iter;
3425 	int cpu;
3426 
3427 	if (tracing_disabled)
3428 		return ERR_PTR(-ENODEV);
3429 
3430 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3431 	if (!iter)
3432 		return ERR_PTR(-ENOMEM);
3433 
3434 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3435 				    GFP_KERNEL);
3436 	if (!iter->buffer_iter)
3437 		goto release;
3438 
3439 	/*
3440 	 * We make a copy of the current tracer to avoid concurrent
3441 	 * changes on it while we are reading.
3442 	 */
3443 	mutex_lock(&trace_types_lock);
3444 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3445 	if (!iter->trace)
3446 		goto fail;
3447 
3448 	*iter->trace = *tr->current_trace;
3449 
3450 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3451 		goto fail;
3452 
3453 	iter->tr = tr;
3454 
3455 #ifdef CONFIG_TRACER_MAX_TRACE
3456 	/* Currently only the top directory has a snapshot */
3457 	if (tr->current_trace->print_max || snapshot)
3458 		iter->trace_buffer = &tr->max_buffer;
3459 	else
3460 #endif
3461 		iter->trace_buffer = &tr->trace_buffer;
3462 	iter->snapshot = snapshot;
3463 	iter->pos = -1;
3464 	iter->cpu_file = tracing_get_cpu(inode);
3465 	mutex_init(&iter->mutex);
3466 
3467 	/* Notify the tracer early; before we stop tracing. */
3468 	if (iter->trace && iter->trace->open)
3469 		iter->trace->open(iter);
3470 
3471 	/* Annotate start of buffers if we had overruns */
3472 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3473 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3474 
3475 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3476 	if (trace_clocks[tr->clock_id].in_ns)
3477 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3478 
3479 	/* stop the trace while dumping if we are not opening "snapshot" */
3480 	if (!iter->snapshot)
3481 		tracing_stop_tr(tr);
3482 
3483 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3484 		for_each_tracing_cpu(cpu) {
3485 			iter->buffer_iter[cpu] =
3486 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3487 		}
3488 		ring_buffer_read_prepare_sync();
3489 		for_each_tracing_cpu(cpu) {
3490 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3491 			tracing_iter_reset(iter, cpu);
3492 		}
3493 	} else {
3494 		cpu = iter->cpu_file;
3495 		iter->buffer_iter[cpu] =
3496 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3497 		ring_buffer_read_prepare_sync();
3498 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3499 		tracing_iter_reset(iter, cpu);
3500 	}
3501 
3502 	mutex_unlock(&trace_types_lock);
3503 
3504 	return iter;
3505 
3506  fail:
3507 	mutex_unlock(&trace_types_lock);
3508 	kfree(iter->trace);
3509 	kfree(iter->buffer_iter);
3510 release:
3511 	seq_release_private(inode, file);
3512 	return ERR_PTR(-ENOMEM);
3513 }
3514 
tracing_open_generic(struct inode * inode,struct file * filp)3515 int tracing_open_generic(struct inode *inode, struct file *filp)
3516 {
3517 	if (tracing_disabled)
3518 		return -ENODEV;
3519 
3520 	filp->private_data = inode->i_private;
3521 	return 0;
3522 }
3523 
tracing_is_disabled(void)3524 bool tracing_is_disabled(void)
3525 {
3526 	return (tracing_disabled) ? true: false;
3527 }
3528 
3529 /*
3530  * Open and update trace_array ref count.
3531  * Must have the current trace_array passed to it.
3532  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)3533 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3534 {
3535 	struct trace_array *tr = inode->i_private;
3536 
3537 	if (tracing_disabled)
3538 		return -ENODEV;
3539 
3540 	if (trace_array_get(tr) < 0)
3541 		return -ENODEV;
3542 
3543 	filp->private_data = inode->i_private;
3544 
3545 	return 0;
3546 }
3547 
tracing_release(struct inode * inode,struct file * file)3548 static int tracing_release(struct inode *inode, struct file *file)
3549 {
3550 	struct trace_array *tr = inode->i_private;
3551 	struct seq_file *m = file->private_data;
3552 	struct trace_iterator *iter;
3553 	int cpu;
3554 
3555 	if (!(file->f_mode & FMODE_READ)) {
3556 		trace_array_put(tr);
3557 		return 0;
3558 	}
3559 
3560 	/* Writes do not use seq_file */
3561 	iter = m->private;
3562 	mutex_lock(&trace_types_lock);
3563 
3564 	for_each_tracing_cpu(cpu) {
3565 		if (iter->buffer_iter[cpu])
3566 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3567 	}
3568 
3569 	if (iter->trace && iter->trace->close)
3570 		iter->trace->close(iter);
3571 
3572 	if (!iter->snapshot)
3573 		/* reenable tracing if it was previously enabled */
3574 		tracing_start_tr(tr);
3575 
3576 	__trace_array_put(tr);
3577 
3578 	mutex_unlock(&trace_types_lock);
3579 
3580 	mutex_destroy(&iter->mutex);
3581 	free_cpumask_var(iter->started);
3582 	kfree(iter->trace);
3583 	kfree(iter->buffer_iter);
3584 	seq_release_private(inode, file);
3585 
3586 	return 0;
3587 }
3588 
tracing_release_generic_tr(struct inode * inode,struct file * file)3589 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3590 {
3591 	struct trace_array *tr = inode->i_private;
3592 
3593 	trace_array_put(tr);
3594 	return 0;
3595 }
3596 
tracing_single_release_tr(struct inode * inode,struct file * file)3597 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3598 {
3599 	struct trace_array *tr = inode->i_private;
3600 
3601 	trace_array_put(tr);
3602 
3603 	return single_release(inode, file);
3604 }
3605 
tracing_open(struct inode * inode,struct file * file)3606 static int tracing_open(struct inode *inode, struct file *file)
3607 {
3608 	struct trace_array *tr = inode->i_private;
3609 	struct trace_iterator *iter;
3610 	int ret = 0;
3611 
3612 	if (trace_array_get(tr) < 0)
3613 		return -ENODEV;
3614 
3615 	/* If this file was open for write, then erase contents */
3616 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3617 		int cpu = tracing_get_cpu(inode);
3618 		struct trace_buffer *trace_buf = &tr->trace_buffer;
3619 
3620 #ifdef CONFIG_TRACER_MAX_TRACE
3621 		if (tr->current_trace->print_max)
3622 			trace_buf = &tr->max_buffer;
3623 #endif
3624 
3625 		if (cpu == RING_BUFFER_ALL_CPUS)
3626 			tracing_reset_online_cpus(trace_buf);
3627 		else
3628 			tracing_reset(trace_buf, cpu);
3629 	}
3630 
3631 	if (file->f_mode & FMODE_READ) {
3632 		iter = __tracing_open(inode, file, false);
3633 		if (IS_ERR(iter))
3634 			ret = PTR_ERR(iter);
3635 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3636 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3637 	}
3638 
3639 	if (ret < 0)
3640 		trace_array_put(tr);
3641 
3642 	return ret;
3643 }
3644 
3645 /*
3646  * Some tracers are not suitable for instance buffers.
3647  * A tracer is always available for the global array (toplevel)
3648  * or if it explicitly states that it is.
3649  */
3650 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)3651 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3652 {
3653 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3654 }
3655 
3656 /* Find the next tracer that this trace array may use */
3657 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)3658 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3659 {
3660 	while (t && !trace_ok_for_array(t, tr))
3661 		t = t->next;
3662 
3663 	return t;
3664 }
3665 
3666 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)3667 t_next(struct seq_file *m, void *v, loff_t *pos)
3668 {
3669 	struct trace_array *tr = m->private;
3670 	struct tracer *t = v;
3671 
3672 	(*pos)++;
3673 
3674 	if (t)
3675 		t = get_tracer_for_array(tr, t->next);
3676 
3677 	return t;
3678 }
3679 
t_start(struct seq_file * m,loff_t * pos)3680 static void *t_start(struct seq_file *m, loff_t *pos)
3681 {
3682 	struct trace_array *tr = m->private;
3683 	struct tracer *t;
3684 	loff_t l = 0;
3685 
3686 	mutex_lock(&trace_types_lock);
3687 
3688 	t = get_tracer_for_array(tr, trace_types);
3689 	for (; t && l < *pos; t = t_next(m, t, &l))
3690 			;
3691 
3692 	return t;
3693 }
3694 
t_stop(struct seq_file * m,void * p)3695 static void t_stop(struct seq_file *m, void *p)
3696 {
3697 	mutex_unlock(&trace_types_lock);
3698 }
3699 
t_show(struct seq_file * m,void * v)3700 static int t_show(struct seq_file *m, void *v)
3701 {
3702 	struct tracer *t = v;
3703 
3704 	if (!t)
3705 		return 0;
3706 
3707 	seq_puts(m, t->name);
3708 	if (t->next)
3709 		seq_putc(m, ' ');
3710 	else
3711 		seq_putc(m, '\n');
3712 
3713 	return 0;
3714 }
3715 
3716 static const struct seq_operations show_traces_seq_ops = {
3717 	.start		= t_start,
3718 	.next		= t_next,
3719 	.stop		= t_stop,
3720 	.show		= t_show,
3721 };
3722 
show_traces_open(struct inode * inode,struct file * file)3723 static int show_traces_open(struct inode *inode, struct file *file)
3724 {
3725 	struct trace_array *tr = inode->i_private;
3726 	struct seq_file *m;
3727 	int ret;
3728 
3729 	if (tracing_disabled)
3730 		return -ENODEV;
3731 
3732 	ret = seq_open(file, &show_traces_seq_ops);
3733 	if (ret)
3734 		return ret;
3735 
3736 	m = file->private_data;
3737 	m->private = tr;
3738 
3739 	return 0;
3740 }
3741 
3742 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)3743 tracing_write_stub(struct file *filp, const char __user *ubuf,
3744 		   size_t count, loff_t *ppos)
3745 {
3746 	return count;
3747 }
3748 
tracing_lseek(struct file * file,loff_t offset,int whence)3749 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3750 {
3751 	int ret;
3752 
3753 	if (file->f_mode & FMODE_READ)
3754 		ret = seq_lseek(file, offset, whence);
3755 	else
3756 		file->f_pos = ret = 0;
3757 
3758 	return ret;
3759 }
3760 
3761 static const struct file_operations tracing_fops = {
3762 	.open		= tracing_open,
3763 	.read		= seq_read,
3764 	.write		= tracing_write_stub,
3765 	.llseek		= tracing_lseek,
3766 	.release	= tracing_release,
3767 };
3768 
3769 static const struct file_operations show_traces_fops = {
3770 	.open		= show_traces_open,
3771 	.read		= seq_read,
3772 	.release	= seq_release,
3773 	.llseek		= seq_lseek,
3774 };
3775 
3776 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)3777 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3778 		     size_t count, loff_t *ppos)
3779 {
3780 	struct trace_array *tr = file_inode(filp)->i_private;
3781 	char *mask_str;
3782 	int len;
3783 
3784 	len = snprintf(NULL, 0, "%*pb\n",
3785 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
3786 	mask_str = kmalloc(len, GFP_KERNEL);
3787 	if (!mask_str)
3788 		return -ENOMEM;
3789 
3790 	len = snprintf(mask_str, len, "%*pb\n",
3791 		       cpumask_pr_args(tr->tracing_cpumask));
3792 	if (len >= count) {
3793 		count = -EINVAL;
3794 		goto out_err;
3795 	}
3796 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3797 
3798 out_err:
3799 	kfree(mask_str);
3800 
3801 	return count;
3802 }
3803 
3804 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)3805 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3806 		      size_t count, loff_t *ppos)
3807 {
3808 	struct trace_array *tr = file_inode(filp)->i_private;
3809 	cpumask_var_t tracing_cpumask_new;
3810 	int err, cpu;
3811 
3812 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3813 		return -ENOMEM;
3814 
3815 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3816 	if (err)
3817 		goto err_unlock;
3818 
3819 	local_irq_disable();
3820 	arch_spin_lock(&tr->max_lock);
3821 	for_each_tracing_cpu(cpu) {
3822 		/*
3823 		 * Increase/decrease the disabled counter if we are
3824 		 * about to flip a bit in the cpumask:
3825 		 */
3826 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3827 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3828 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3829 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3830 		}
3831 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3832 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3833 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3834 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3835 		}
3836 	}
3837 	arch_spin_unlock(&tr->max_lock);
3838 	local_irq_enable();
3839 
3840 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3841 	free_cpumask_var(tracing_cpumask_new);
3842 
3843 	return count;
3844 
3845 err_unlock:
3846 	free_cpumask_var(tracing_cpumask_new);
3847 
3848 	return err;
3849 }
3850 
3851 static const struct file_operations tracing_cpumask_fops = {
3852 	.open		= tracing_open_generic_tr,
3853 	.read		= tracing_cpumask_read,
3854 	.write		= tracing_cpumask_write,
3855 	.release	= tracing_release_generic_tr,
3856 	.llseek		= generic_file_llseek,
3857 };
3858 
tracing_trace_options_show(struct seq_file * m,void * v)3859 static int tracing_trace_options_show(struct seq_file *m, void *v)
3860 {
3861 	struct tracer_opt *trace_opts;
3862 	struct trace_array *tr = m->private;
3863 	u32 tracer_flags;
3864 	int i;
3865 
3866 	mutex_lock(&trace_types_lock);
3867 	tracer_flags = tr->current_trace->flags->val;
3868 	trace_opts = tr->current_trace->flags->opts;
3869 
3870 	for (i = 0; trace_options[i]; i++) {
3871 		if (tr->trace_flags & (1 << i))
3872 			seq_printf(m, "%s\n", trace_options[i]);
3873 		else
3874 			seq_printf(m, "no%s\n", trace_options[i]);
3875 	}
3876 
3877 	for (i = 0; trace_opts[i].name; i++) {
3878 		if (tracer_flags & trace_opts[i].bit)
3879 			seq_printf(m, "%s\n", trace_opts[i].name);
3880 		else
3881 			seq_printf(m, "no%s\n", trace_opts[i].name);
3882 	}
3883 	mutex_unlock(&trace_types_lock);
3884 
3885 	return 0;
3886 }
3887 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)3888 static int __set_tracer_option(struct trace_array *tr,
3889 			       struct tracer_flags *tracer_flags,
3890 			       struct tracer_opt *opts, int neg)
3891 {
3892 	struct tracer *trace = tracer_flags->trace;
3893 	int ret;
3894 
3895 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3896 	if (ret)
3897 		return ret;
3898 
3899 	if (neg)
3900 		tracer_flags->val &= ~opts->bit;
3901 	else
3902 		tracer_flags->val |= opts->bit;
3903 	return 0;
3904 }
3905 
3906 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)3907 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3908 {
3909 	struct tracer *trace = tr->current_trace;
3910 	struct tracer_flags *tracer_flags = trace->flags;
3911 	struct tracer_opt *opts = NULL;
3912 	int i;
3913 
3914 	for (i = 0; tracer_flags->opts[i].name; i++) {
3915 		opts = &tracer_flags->opts[i];
3916 
3917 		if (strcmp(cmp, opts->name) == 0)
3918 			return __set_tracer_option(tr, trace->flags, opts, neg);
3919 	}
3920 
3921 	return -EINVAL;
3922 }
3923 
3924 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)3925 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3926 {
3927 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3928 		return -1;
3929 
3930 	return 0;
3931 }
3932 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)3933 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3934 {
3935 	/* do nothing if flag is already set */
3936 	if (!!(tr->trace_flags & mask) == !!enabled)
3937 		return 0;
3938 
3939 	/* Give the tracer a chance to approve the change */
3940 	if (tr->current_trace->flag_changed)
3941 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3942 			return -EINVAL;
3943 
3944 	if (enabled)
3945 		tr->trace_flags |= mask;
3946 	else
3947 		tr->trace_flags &= ~mask;
3948 
3949 	if (mask == TRACE_ITER_RECORD_CMD)
3950 		trace_event_enable_cmd_record(enabled);
3951 
3952 	if (mask == TRACE_ITER_EVENT_FORK)
3953 		trace_event_follow_fork(tr, enabled);
3954 
3955 	if (mask == TRACE_ITER_OVERWRITE) {
3956 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3957 #ifdef CONFIG_TRACER_MAX_TRACE
3958 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3959 #endif
3960 	}
3961 
3962 	if (mask == TRACE_ITER_PRINTK) {
3963 		trace_printk_start_stop_comm(enabled);
3964 		trace_printk_control(enabled);
3965 	}
3966 
3967 	return 0;
3968 }
3969 
trace_set_options(struct trace_array * tr,char * option)3970 static int trace_set_options(struct trace_array *tr, char *option)
3971 {
3972 	char *cmp;
3973 	int neg = 0;
3974 	int ret = -ENODEV;
3975 	int i;
3976 	size_t orig_len = strlen(option);
3977 
3978 	cmp = strstrip(option);
3979 
3980 	if (strncmp(cmp, "no", 2) == 0) {
3981 		neg = 1;
3982 		cmp += 2;
3983 	}
3984 
3985 	mutex_lock(&trace_types_lock);
3986 
3987 	for (i = 0; trace_options[i]; i++) {
3988 		if (strcmp(cmp, trace_options[i]) == 0) {
3989 			ret = set_tracer_flag(tr, 1 << i, !neg);
3990 			break;
3991 		}
3992 	}
3993 
3994 	/* If no option could be set, test the specific tracer options */
3995 	if (!trace_options[i])
3996 		ret = set_tracer_option(tr, cmp, neg);
3997 
3998 	mutex_unlock(&trace_types_lock);
3999 
4000 	/*
4001 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4002 	 * turn it back into a space.
4003 	 */
4004 	if (orig_len > strlen(option))
4005 		option[strlen(option)] = ' ';
4006 
4007 	return ret;
4008 }
4009 
apply_trace_boot_options(void)4010 static void __init apply_trace_boot_options(void)
4011 {
4012 	char *buf = trace_boot_options_buf;
4013 	char *option;
4014 
4015 	while (true) {
4016 		option = strsep(&buf, ",");
4017 
4018 		if (!option)
4019 			break;
4020 
4021 		if (*option)
4022 			trace_set_options(&global_trace, option);
4023 
4024 		/* Put back the comma to allow this to be called again */
4025 		if (buf)
4026 			*(buf - 1) = ',';
4027 	}
4028 }
4029 
4030 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4031 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4032 			size_t cnt, loff_t *ppos)
4033 {
4034 	struct seq_file *m = filp->private_data;
4035 	struct trace_array *tr = m->private;
4036 	char buf[64];
4037 	int ret;
4038 
4039 	if (cnt >= sizeof(buf))
4040 		return -EINVAL;
4041 
4042 	if (copy_from_user(buf, ubuf, cnt))
4043 		return -EFAULT;
4044 
4045 	buf[cnt] = 0;
4046 
4047 	ret = trace_set_options(tr, buf);
4048 	if (ret < 0)
4049 		return ret;
4050 
4051 	*ppos += cnt;
4052 
4053 	return cnt;
4054 }
4055 
tracing_trace_options_open(struct inode * inode,struct file * file)4056 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4057 {
4058 	struct trace_array *tr = inode->i_private;
4059 	int ret;
4060 
4061 	if (tracing_disabled)
4062 		return -ENODEV;
4063 
4064 	if (trace_array_get(tr) < 0)
4065 		return -ENODEV;
4066 
4067 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4068 	if (ret < 0)
4069 		trace_array_put(tr);
4070 
4071 	return ret;
4072 }
4073 
4074 static const struct file_operations tracing_iter_fops = {
4075 	.open		= tracing_trace_options_open,
4076 	.read		= seq_read,
4077 	.llseek		= seq_lseek,
4078 	.release	= tracing_single_release_tr,
4079 	.write		= tracing_trace_options_write,
4080 };
4081 
4082 static const char readme_msg[] =
4083 	"tracing mini-HOWTO:\n\n"
4084 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4085 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4086 	" Important files:\n"
4087 	"  trace\t\t\t- The static contents of the buffer\n"
4088 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4089 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4090 	"  current_tracer\t- function and latency tracers\n"
4091 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4092 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4093 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4094 	"  trace_clock\t\t-change the clock used to order events\n"
4095 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4096 	"      global:   Synced across CPUs but slows tracing down.\n"
4097 	"     counter:   Not a clock, but just an increment\n"
4098 	"      uptime:   Jiffy counter from time of boot\n"
4099 	"        perf:   Same clock that perf events use\n"
4100 #ifdef CONFIG_X86_64
4101 	"     x86-tsc:   TSC cycle counter\n"
4102 #endif
4103 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4104 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4105 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4106 	"\t\t\t  Remove sub-buffer with rmdir\n"
4107 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4108 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4109 	"\t\t\t  option name\n"
4110 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4111 #ifdef CONFIG_DYNAMIC_FTRACE
4112 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4113 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4114 	"\t\t\t  functions\n"
4115 	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4116 	"\t     modules: Can select a group via module\n"
4117 	"\t      Format: :mod:<module-name>\n"
4118 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4119 	"\t    triggers: a command to perform when function is hit\n"
4120 	"\t      Format: <function>:<trigger>[:count]\n"
4121 	"\t     trigger: traceon, traceoff\n"
4122 	"\t\t      enable_event:<system>:<event>\n"
4123 	"\t\t      disable_event:<system>:<event>\n"
4124 #ifdef CONFIG_STACKTRACE
4125 	"\t\t      stacktrace\n"
4126 #endif
4127 #ifdef CONFIG_TRACER_SNAPSHOT
4128 	"\t\t      snapshot\n"
4129 #endif
4130 	"\t\t      dump\n"
4131 	"\t\t      cpudump\n"
4132 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4133 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4134 	"\t     The first one will disable tracing every time do_fault is hit\n"
4135 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4136 	"\t       The first time do trap is hit and it disables tracing, the\n"
4137 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4138 	"\t       the counter will not decrement. It only decrements when the\n"
4139 	"\t       trigger did work\n"
4140 	"\t     To remove trigger without count:\n"
4141 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4142 	"\t     To remove trigger with a count:\n"
4143 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4144 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4145 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4146 	"\t    modules: Can select a group via module command :mod:\n"
4147 	"\t    Does not accept triggers\n"
4148 #endif /* CONFIG_DYNAMIC_FTRACE */
4149 #ifdef CONFIG_FUNCTION_TRACER
4150 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4151 	"\t\t    (function)\n"
4152 #endif
4153 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4154 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4155 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4156 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4157 #endif
4158 #ifdef CONFIG_TRACER_SNAPSHOT
4159 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4160 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4161 	"\t\t\t  information\n"
4162 #endif
4163 #ifdef CONFIG_STACK_TRACER
4164 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4165 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4166 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4167 	"\t\t\t  new trace)\n"
4168 #ifdef CONFIG_DYNAMIC_FTRACE
4169 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4170 	"\t\t\t  traces\n"
4171 #endif
4172 #endif /* CONFIG_STACK_TRACER */
4173 #ifdef CONFIG_KPROBE_EVENT
4174 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4175 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4176 #endif
4177 #ifdef CONFIG_UPROBE_EVENT
4178 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4179 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4180 #endif
4181 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4182 	"\t  accepts: event-definitions (one definition per line)\n"
4183 	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4184 	"\t           -:[<group>/]<event>\n"
4185 #ifdef CONFIG_KPROBE_EVENT
4186 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4187 #endif
4188 #ifdef CONFIG_UPROBE_EVENT
4189 	"\t    place: <path>:<offset>\n"
4190 #endif
4191 	"\t     args: <name>=fetcharg[:type]\n"
4192 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4193 	"\t           $stack<index>, $stack, $retval, $comm\n"
4194 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4195 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4196 #endif
4197 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4198 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4199 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4200 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4201 	"\t\t\t  events\n"
4202 	"      filter\t\t- If set, only events passing filter are traced\n"
4203 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4204 	"\t\t\t  <event>:\n"
4205 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4206 	"      filter\t\t- If set, only events passing filter are traced\n"
4207 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4208 	"\t    Format: <trigger>[:count][if <filter>]\n"
4209 	"\t   trigger: traceon, traceoff\n"
4210 	"\t            enable_event:<system>:<event>\n"
4211 	"\t            disable_event:<system>:<event>\n"
4212 #ifdef CONFIG_HIST_TRIGGERS
4213 	"\t            enable_hist:<system>:<event>\n"
4214 	"\t            disable_hist:<system>:<event>\n"
4215 #endif
4216 #ifdef CONFIG_STACKTRACE
4217 	"\t\t    stacktrace\n"
4218 #endif
4219 #ifdef CONFIG_TRACER_SNAPSHOT
4220 	"\t\t    snapshot\n"
4221 #endif
4222 #ifdef CONFIG_HIST_TRIGGERS
4223 	"\t\t    hist (see below)\n"
4224 #endif
4225 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4226 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4227 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4228 	"\t                  events/block/block_unplug/trigger\n"
4229 	"\t   The first disables tracing every time block_unplug is hit.\n"
4230 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4231 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4232 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4233 	"\t   Like function triggers, the counter is only decremented if it\n"
4234 	"\t    enabled or disabled tracing.\n"
4235 	"\t   To remove a trigger without a count:\n"
4236 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4237 	"\t   To remove a trigger with a count:\n"
4238 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4239 	"\t   Filters can be ignored when removing a trigger.\n"
4240 #ifdef CONFIG_HIST_TRIGGERS
4241 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4242 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4243 	"\t            [:values=<field1[,field2,...]>]\n"
4244 	"\t            [:sort=<field1[,field2,...]>]\n"
4245 	"\t            [:size=#entries]\n"
4246 	"\t            [:pause][:continue][:clear]\n"
4247 	"\t            [:name=histname1]\n"
4248 	"\t            [if <filter>]\n\n"
4249 	"\t    When a matching event is hit, an entry is added to a hash\n"
4250 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4251 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4252 	"\t    correspond to fields in the event's format description.  Keys\n"
4253 	"\t    can be any field, or the special string 'stacktrace'.\n"
4254 	"\t    Compound keys consisting of up to two fields can be specified\n"
4255 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4256 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4257 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4258 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4259 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4260 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4261 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4262 	"\t    its histogram data will be shared with other triggers of the\n"
4263 	"\t    same name, and trigger hits will update this common data.\n\n"
4264 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4265 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4266 	"\t    triggers attached to an event, there will be a table for each\n"
4267 	"\t    trigger in the output.  The table displayed for a named\n"
4268 	"\t    trigger will be the same as any other instance having the\n"
4269 	"\t    same name.  The default format used to display a given field\n"
4270 	"\t    can be modified by appending any of the following modifiers\n"
4271 	"\t    to the field name, as applicable:\n\n"
4272 	"\t            .hex        display a number as a hex value\n"
4273 	"\t            .sym        display an address as a symbol\n"
4274 	"\t            .sym-offset display an address as a symbol and offset\n"
4275 	"\t            .execname   display a common_pid as a program name\n"
4276 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4277 	"\t            .log2       display log2 value rather than raw number\n\n"
4278 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4279 	"\t    trigger or to start a hist trigger but not log any events\n"
4280 	"\t    until told to do so.  'continue' can be used to start or\n"
4281 	"\t    restart a paused hist trigger.\n\n"
4282 	"\t    The 'clear' parameter will clear the contents of a running\n"
4283 	"\t    hist trigger and leave its current paused/active state\n"
4284 	"\t    unchanged.\n\n"
4285 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4286 	"\t    have one event conditionally start and stop another event's\n"
4287 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4288 	"\t    the enable_event and disable_event triggers.\n"
4289 #endif
4290 ;
4291 
4292 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4293 tracing_readme_read(struct file *filp, char __user *ubuf,
4294 		       size_t cnt, loff_t *ppos)
4295 {
4296 	return simple_read_from_buffer(ubuf, cnt, ppos,
4297 					readme_msg, strlen(readme_msg));
4298 }
4299 
4300 static const struct file_operations tracing_readme_fops = {
4301 	.open		= tracing_open_generic,
4302 	.read		= tracing_readme_read,
4303 	.llseek		= generic_file_llseek,
4304 };
4305 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)4306 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4307 {
4308 	unsigned int *ptr = v;
4309 
4310 	if (*pos || m->count)
4311 		ptr++;
4312 
4313 	(*pos)++;
4314 
4315 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4316 	     ptr++) {
4317 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4318 			continue;
4319 
4320 		return ptr;
4321 	}
4322 
4323 	return NULL;
4324 }
4325 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)4326 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4327 {
4328 	void *v;
4329 	loff_t l = 0;
4330 
4331 	preempt_disable();
4332 	arch_spin_lock(&trace_cmdline_lock);
4333 
4334 	v = &savedcmd->map_cmdline_to_pid[0];
4335 	while (l <= *pos) {
4336 		v = saved_cmdlines_next(m, v, &l);
4337 		if (!v)
4338 			return NULL;
4339 	}
4340 
4341 	return v;
4342 }
4343 
saved_cmdlines_stop(struct seq_file * m,void * v)4344 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4345 {
4346 	arch_spin_unlock(&trace_cmdline_lock);
4347 	preempt_enable();
4348 }
4349 
saved_cmdlines_show(struct seq_file * m,void * v)4350 static int saved_cmdlines_show(struct seq_file *m, void *v)
4351 {
4352 	char buf[TASK_COMM_LEN];
4353 	unsigned int *pid = v;
4354 
4355 	__trace_find_cmdline(*pid, buf);
4356 	seq_printf(m, "%d %s\n", *pid, buf);
4357 	return 0;
4358 }
4359 
4360 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4361 	.start		= saved_cmdlines_start,
4362 	.next		= saved_cmdlines_next,
4363 	.stop		= saved_cmdlines_stop,
4364 	.show		= saved_cmdlines_show,
4365 };
4366 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)4367 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4368 {
4369 	if (tracing_disabled)
4370 		return -ENODEV;
4371 
4372 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4373 }
4374 
4375 static const struct file_operations tracing_saved_cmdlines_fops = {
4376 	.open		= tracing_saved_cmdlines_open,
4377 	.read		= seq_read,
4378 	.llseek		= seq_lseek,
4379 	.release	= seq_release,
4380 };
4381 
4382 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4383 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4384 				 size_t cnt, loff_t *ppos)
4385 {
4386 	char buf[64];
4387 	int r;
4388 
4389 	arch_spin_lock(&trace_cmdline_lock);
4390 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4391 	arch_spin_unlock(&trace_cmdline_lock);
4392 
4393 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4394 }
4395 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)4396 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4397 {
4398 	kfree(s->saved_cmdlines);
4399 	kfree(s->map_cmdline_to_pid);
4400 	kfree(s);
4401 }
4402 
tracing_resize_saved_cmdlines(unsigned int val)4403 static int tracing_resize_saved_cmdlines(unsigned int val)
4404 {
4405 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4406 
4407 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4408 	if (!s)
4409 		return -ENOMEM;
4410 
4411 	if (allocate_cmdlines_buffer(val, s) < 0) {
4412 		kfree(s);
4413 		return -ENOMEM;
4414 	}
4415 
4416 	arch_spin_lock(&trace_cmdline_lock);
4417 	savedcmd_temp = savedcmd;
4418 	savedcmd = s;
4419 	arch_spin_unlock(&trace_cmdline_lock);
4420 	free_saved_cmdlines_buffer(savedcmd_temp);
4421 
4422 	return 0;
4423 }
4424 
4425 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4426 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4427 				  size_t cnt, loff_t *ppos)
4428 {
4429 	unsigned long val;
4430 	int ret;
4431 
4432 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4433 	if (ret)
4434 		return ret;
4435 
4436 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4437 	if (!val || val > PID_MAX_DEFAULT)
4438 		return -EINVAL;
4439 
4440 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4441 	if (ret < 0)
4442 		return ret;
4443 
4444 	*ppos += cnt;
4445 
4446 	return cnt;
4447 }
4448 
4449 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4450 	.open		= tracing_open_generic,
4451 	.read		= tracing_saved_cmdlines_size_read,
4452 	.write		= tracing_saved_cmdlines_size_write,
4453 };
4454 
4455 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4456 static union trace_enum_map_item *
update_enum_map(union trace_enum_map_item * ptr)4457 update_enum_map(union trace_enum_map_item *ptr)
4458 {
4459 	if (!ptr->map.enum_string) {
4460 		if (ptr->tail.next) {
4461 			ptr = ptr->tail.next;
4462 			/* Set ptr to the next real item (skip head) */
4463 			ptr++;
4464 		} else
4465 			return NULL;
4466 	}
4467 	return ptr;
4468 }
4469 
enum_map_next(struct seq_file * m,void * v,loff_t * pos)4470 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4471 {
4472 	union trace_enum_map_item *ptr = v;
4473 
4474 	/*
4475 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4476 	 * This really should never happen.
4477 	 */
4478 	ptr = update_enum_map(ptr);
4479 	if (WARN_ON_ONCE(!ptr))
4480 		return NULL;
4481 
4482 	ptr++;
4483 
4484 	(*pos)++;
4485 
4486 	ptr = update_enum_map(ptr);
4487 
4488 	return ptr;
4489 }
4490 
enum_map_start(struct seq_file * m,loff_t * pos)4491 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4492 {
4493 	union trace_enum_map_item *v;
4494 	loff_t l = 0;
4495 
4496 	mutex_lock(&trace_enum_mutex);
4497 
4498 	v = trace_enum_maps;
4499 	if (v)
4500 		v++;
4501 
4502 	while (v && l < *pos) {
4503 		v = enum_map_next(m, v, &l);
4504 	}
4505 
4506 	return v;
4507 }
4508 
enum_map_stop(struct seq_file * m,void * v)4509 static void enum_map_stop(struct seq_file *m, void *v)
4510 {
4511 	mutex_unlock(&trace_enum_mutex);
4512 }
4513 
enum_map_show(struct seq_file * m,void * v)4514 static int enum_map_show(struct seq_file *m, void *v)
4515 {
4516 	union trace_enum_map_item *ptr = v;
4517 
4518 	seq_printf(m, "%s %ld (%s)\n",
4519 		   ptr->map.enum_string, ptr->map.enum_value,
4520 		   ptr->map.system);
4521 
4522 	return 0;
4523 }
4524 
4525 static const struct seq_operations tracing_enum_map_seq_ops = {
4526 	.start		= enum_map_start,
4527 	.next		= enum_map_next,
4528 	.stop		= enum_map_stop,
4529 	.show		= enum_map_show,
4530 };
4531 
tracing_enum_map_open(struct inode * inode,struct file * filp)4532 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4533 {
4534 	if (tracing_disabled)
4535 		return -ENODEV;
4536 
4537 	return seq_open(filp, &tracing_enum_map_seq_ops);
4538 }
4539 
4540 static const struct file_operations tracing_enum_map_fops = {
4541 	.open		= tracing_enum_map_open,
4542 	.read		= seq_read,
4543 	.llseek		= seq_lseek,
4544 	.release	= seq_release,
4545 };
4546 
4547 static inline union trace_enum_map_item *
trace_enum_jmp_to_tail(union trace_enum_map_item * ptr)4548 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4549 {
4550 	/* Return tail of array given the head */
4551 	return ptr + ptr->head.length + 1;
4552 }
4553 
4554 static void
trace_insert_enum_map_file(struct module * mod,struct trace_enum_map ** start,int len)4555 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4556 			   int len)
4557 {
4558 	struct trace_enum_map **stop;
4559 	struct trace_enum_map **map;
4560 	union trace_enum_map_item *map_array;
4561 	union trace_enum_map_item *ptr;
4562 
4563 	stop = start + len;
4564 
4565 	/*
4566 	 * The trace_enum_maps contains the map plus a head and tail item,
4567 	 * where the head holds the module and length of array, and the
4568 	 * tail holds a pointer to the next list.
4569 	 */
4570 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4571 	if (!map_array) {
4572 		pr_warn("Unable to allocate trace enum mapping\n");
4573 		return;
4574 	}
4575 
4576 	mutex_lock(&trace_enum_mutex);
4577 
4578 	if (!trace_enum_maps)
4579 		trace_enum_maps = map_array;
4580 	else {
4581 		ptr = trace_enum_maps;
4582 		for (;;) {
4583 			ptr = trace_enum_jmp_to_tail(ptr);
4584 			if (!ptr->tail.next)
4585 				break;
4586 			ptr = ptr->tail.next;
4587 
4588 		}
4589 		ptr->tail.next = map_array;
4590 	}
4591 	map_array->head.mod = mod;
4592 	map_array->head.length = len;
4593 	map_array++;
4594 
4595 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4596 		map_array->map = **map;
4597 		map_array++;
4598 	}
4599 	memset(map_array, 0, sizeof(*map_array));
4600 
4601 	mutex_unlock(&trace_enum_mutex);
4602 }
4603 
trace_create_enum_file(struct dentry * d_tracer)4604 static void trace_create_enum_file(struct dentry *d_tracer)
4605 {
4606 	trace_create_file("enum_map", 0444, d_tracer,
4607 			  NULL, &tracing_enum_map_fops);
4608 }
4609 
4610 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
trace_create_enum_file(struct dentry * d_tracer)4611 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
trace_insert_enum_map_file(struct module * mod,struct trace_enum_map ** start,int len)4612 static inline void trace_insert_enum_map_file(struct module *mod,
4613 			      struct trace_enum_map **start, int len) { }
4614 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4615 
trace_insert_enum_map(struct module * mod,struct trace_enum_map ** start,int len)4616 static void trace_insert_enum_map(struct module *mod,
4617 				  struct trace_enum_map **start, int len)
4618 {
4619 	struct trace_enum_map **map;
4620 
4621 	if (len <= 0)
4622 		return;
4623 
4624 	map = start;
4625 
4626 	trace_event_enum_update(map, len);
4627 
4628 	trace_insert_enum_map_file(mod, start, len);
4629 }
4630 
4631 static ssize_t
tracing_saved_tgids_read(struct file * file,char __user * ubuf,size_t cnt,loff_t * ppos)4632 tracing_saved_tgids_read(struct file *file, char __user *ubuf,
4633 				size_t cnt, loff_t *ppos)
4634 {
4635 	char *file_buf;
4636 	char *buf;
4637 	int len = 0;
4638 	int pid;
4639 	int i;
4640 
4641 	file_buf = kmalloc(SAVED_CMDLINES_DEFAULT*(16+1+16), GFP_KERNEL);
4642 	if (!file_buf)
4643 		return -ENOMEM;
4644 
4645 	buf = file_buf;
4646 
4647 	for (i = 0; i < SAVED_CMDLINES_DEFAULT; i++) {
4648 		int tgid;
4649 		int r;
4650 
4651 		pid = savedcmd->map_cmdline_to_pid[i];
4652 		if (pid == -1 || pid == NO_CMDLINE_MAP)
4653 			continue;
4654 
4655 		tgid = trace_find_tgid(pid);
4656 		r = sprintf(buf, "%d %d\n", pid, tgid);
4657 		buf += r;
4658 		len += r;
4659 	}
4660 
4661 	len = simple_read_from_buffer(ubuf, cnt, ppos,
4662 				      file_buf, len);
4663 
4664 	kfree(file_buf);
4665 
4666 	return len;
4667 }
4668 
4669 static const struct file_operations tracing_saved_tgids_fops = {
4670 	.open	= tracing_open_generic,
4671 	.read	= tracing_saved_tgids_read,
4672 	.llseek	= generic_file_llseek,
4673 };
4674 
4675 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4676 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4677 		       size_t cnt, loff_t *ppos)
4678 {
4679 	struct trace_array *tr = filp->private_data;
4680 	char buf[MAX_TRACER_SIZE+2];
4681 	int r;
4682 
4683 	mutex_lock(&trace_types_lock);
4684 	r = sprintf(buf, "%s\n", tr->current_trace->name);
4685 	mutex_unlock(&trace_types_lock);
4686 
4687 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4688 }
4689 
tracer_init(struct tracer * t,struct trace_array * tr)4690 int tracer_init(struct tracer *t, struct trace_array *tr)
4691 {
4692 	tracing_reset_online_cpus(&tr->trace_buffer);
4693 	return t->init(tr);
4694 }
4695 
set_buffer_entries(struct trace_buffer * buf,unsigned long val)4696 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4697 {
4698 	int cpu;
4699 
4700 	for_each_tracing_cpu(cpu)
4701 		per_cpu_ptr(buf->data, cpu)->entries = val;
4702 }
4703 
4704 #ifdef CONFIG_TRACER_MAX_TRACE
4705 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct trace_buffer * trace_buf,struct trace_buffer * size_buf,int cpu_id)4706 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4707 					struct trace_buffer *size_buf, int cpu_id)
4708 {
4709 	int cpu, ret = 0;
4710 
4711 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4712 		for_each_tracing_cpu(cpu) {
4713 			ret = ring_buffer_resize(trace_buf->buffer,
4714 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4715 			if (ret < 0)
4716 				break;
4717 			per_cpu_ptr(trace_buf->data, cpu)->entries =
4718 				per_cpu_ptr(size_buf->data, cpu)->entries;
4719 		}
4720 	} else {
4721 		ret = ring_buffer_resize(trace_buf->buffer,
4722 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4723 		if (ret == 0)
4724 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4725 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4726 	}
4727 
4728 	return ret;
4729 }
4730 #endif /* CONFIG_TRACER_MAX_TRACE */
4731 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)4732 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4733 					unsigned long size, int cpu)
4734 {
4735 	int ret;
4736 
4737 	/*
4738 	 * If kernel or user changes the size of the ring buffer
4739 	 * we use the size that was given, and we can forget about
4740 	 * expanding it later.
4741 	 */
4742 	ring_buffer_expanded = true;
4743 
4744 	/* May be called before buffers are initialized */
4745 	if (!tr->trace_buffer.buffer)
4746 		return 0;
4747 
4748 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4749 	if (ret < 0)
4750 		return ret;
4751 
4752 #ifdef CONFIG_TRACER_MAX_TRACE
4753 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4754 	    !tr->current_trace->use_max_tr)
4755 		goto out;
4756 
4757 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4758 	if (ret < 0) {
4759 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4760 						     &tr->trace_buffer, cpu);
4761 		if (r < 0) {
4762 			/*
4763 			 * AARGH! We are left with different
4764 			 * size max buffer!!!!
4765 			 * The max buffer is our "snapshot" buffer.
4766 			 * When a tracer needs a snapshot (one of the
4767 			 * latency tracers), it swaps the max buffer
4768 			 * with the saved snap shot. We succeeded to
4769 			 * update the size of the main buffer, but failed to
4770 			 * update the size of the max buffer. But when we tried
4771 			 * to reset the main buffer to the original size, we
4772 			 * failed there too. This is very unlikely to
4773 			 * happen, but if it does, warn and kill all
4774 			 * tracing.
4775 			 */
4776 			WARN_ON(1);
4777 			tracing_disabled = 1;
4778 		}
4779 		return ret;
4780 	}
4781 
4782 	if (cpu == RING_BUFFER_ALL_CPUS)
4783 		set_buffer_entries(&tr->max_buffer, size);
4784 	else
4785 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4786 
4787  out:
4788 #endif /* CONFIG_TRACER_MAX_TRACE */
4789 
4790 	if (cpu == RING_BUFFER_ALL_CPUS)
4791 		set_buffer_entries(&tr->trace_buffer, size);
4792 	else
4793 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4794 
4795 	return ret;
4796 }
4797 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)4798 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4799 					  unsigned long size, int cpu_id)
4800 {
4801 	int ret = size;
4802 
4803 	mutex_lock(&trace_types_lock);
4804 
4805 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4806 		/* make sure, this cpu is enabled in the mask */
4807 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4808 			ret = -EINVAL;
4809 			goto out;
4810 		}
4811 	}
4812 
4813 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4814 	if (ret < 0)
4815 		ret = -ENOMEM;
4816 
4817 out:
4818 	mutex_unlock(&trace_types_lock);
4819 
4820 	return ret;
4821 }
4822 
4823 
4824 /**
4825  * tracing_update_buffers - used by tracing facility to expand ring buffers
4826  *
4827  * To save on memory when the tracing is never used on a system with it
4828  * configured in. The ring buffers are set to a minimum size. But once
4829  * a user starts to use the tracing facility, then they need to grow
4830  * to their default size.
4831  *
4832  * This function is to be called when a tracer is about to be used.
4833  */
tracing_update_buffers(void)4834 int tracing_update_buffers(void)
4835 {
4836 	int ret = 0;
4837 
4838 	mutex_lock(&trace_types_lock);
4839 	if (!ring_buffer_expanded)
4840 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4841 						RING_BUFFER_ALL_CPUS);
4842 	mutex_unlock(&trace_types_lock);
4843 
4844 	return ret;
4845 }
4846 
4847 struct trace_option_dentry;
4848 
4849 static void
4850 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4851 
4852 /*
4853  * Used to clear out the tracer before deletion of an instance.
4854  * Must have trace_types_lock held.
4855  */
tracing_set_nop(struct trace_array * tr)4856 static void tracing_set_nop(struct trace_array *tr)
4857 {
4858 	if (tr->current_trace == &nop_trace)
4859 		return;
4860 
4861 	tr->current_trace->enabled--;
4862 
4863 	if (tr->current_trace->reset)
4864 		tr->current_trace->reset(tr);
4865 
4866 	tr->current_trace = &nop_trace;
4867 }
4868 
add_tracer_options(struct trace_array * tr,struct tracer * t)4869 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4870 {
4871 	/* Only enable if the directory has been created already. */
4872 	if (!tr->dir)
4873 		return;
4874 
4875 	create_trace_option_files(tr, t);
4876 }
4877 
tracing_set_tracer(struct trace_array * tr,const char * buf)4878 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4879 {
4880 	struct tracer *t;
4881 #ifdef CONFIG_TRACER_MAX_TRACE
4882 	bool had_max_tr;
4883 #endif
4884 	int ret = 0;
4885 
4886 	mutex_lock(&trace_types_lock);
4887 
4888 	if (!ring_buffer_expanded) {
4889 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4890 						RING_BUFFER_ALL_CPUS);
4891 		if (ret < 0)
4892 			goto out;
4893 		ret = 0;
4894 	}
4895 
4896 	for (t = trace_types; t; t = t->next) {
4897 		if (strcmp(t->name, buf) == 0)
4898 			break;
4899 	}
4900 	if (!t) {
4901 		ret = -EINVAL;
4902 		goto out;
4903 	}
4904 	if (t == tr->current_trace)
4905 		goto out;
4906 
4907 	/* Some tracers are only allowed for the top level buffer */
4908 	if (!trace_ok_for_array(t, tr)) {
4909 		ret = -EINVAL;
4910 		goto out;
4911 	}
4912 
4913 	/* If trace pipe files are being read, we can't change the tracer */
4914 	if (tr->current_trace->ref) {
4915 		ret = -EBUSY;
4916 		goto out;
4917 	}
4918 
4919 	trace_branch_disable();
4920 
4921 	tr->current_trace->enabled--;
4922 
4923 	if (tr->current_trace->reset)
4924 		tr->current_trace->reset(tr);
4925 
4926 	/* Current trace needs to be nop_trace before synchronize_sched */
4927 	tr->current_trace = &nop_trace;
4928 
4929 #ifdef CONFIG_TRACER_MAX_TRACE
4930 	had_max_tr = tr->allocated_snapshot;
4931 
4932 	if (had_max_tr && !t->use_max_tr) {
4933 		/*
4934 		 * We need to make sure that the update_max_tr sees that
4935 		 * current_trace changed to nop_trace to keep it from
4936 		 * swapping the buffers after we resize it.
4937 		 * The update_max_tr is called from interrupts disabled
4938 		 * so a synchronized_sched() is sufficient.
4939 		 */
4940 		synchronize_sched();
4941 		free_snapshot(tr);
4942 	}
4943 #endif
4944 
4945 #ifdef CONFIG_TRACER_MAX_TRACE
4946 	if (t->use_max_tr && !had_max_tr) {
4947 		ret = alloc_snapshot(tr);
4948 		if (ret < 0)
4949 			goto out;
4950 	}
4951 #endif
4952 
4953 	if (t->init) {
4954 		ret = tracer_init(t, tr);
4955 		if (ret)
4956 			goto out;
4957 	}
4958 
4959 	tr->current_trace = t;
4960 	tr->current_trace->enabled++;
4961 	trace_branch_enable(tr);
4962  out:
4963 	mutex_unlock(&trace_types_lock);
4964 
4965 	return ret;
4966 }
4967 
4968 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4969 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4970 			size_t cnt, loff_t *ppos)
4971 {
4972 	struct trace_array *tr = filp->private_data;
4973 	char buf[MAX_TRACER_SIZE+1];
4974 	int i;
4975 	size_t ret;
4976 	int err;
4977 
4978 	ret = cnt;
4979 
4980 	if (cnt > MAX_TRACER_SIZE)
4981 		cnt = MAX_TRACER_SIZE;
4982 
4983 	if (copy_from_user(buf, ubuf, cnt))
4984 		return -EFAULT;
4985 
4986 	buf[cnt] = 0;
4987 
4988 	/* strip ending whitespace. */
4989 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4990 		buf[i] = 0;
4991 
4992 	err = tracing_set_tracer(tr, buf);
4993 	if (err)
4994 		return err;
4995 
4996 	*ppos += ret;
4997 
4998 	return ret;
4999 }
5000 
5001 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5002 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5003 		   size_t cnt, loff_t *ppos)
5004 {
5005 	char buf[64];
5006 	int r;
5007 
5008 	r = snprintf(buf, sizeof(buf), "%ld\n",
5009 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5010 	if (r > sizeof(buf))
5011 		r = sizeof(buf);
5012 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5013 }
5014 
5015 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5016 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5017 		    size_t cnt, loff_t *ppos)
5018 {
5019 	unsigned long val;
5020 	int ret;
5021 
5022 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5023 	if (ret)
5024 		return ret;
5025 
5026 	*ptr = val * 1000;
5027 
5028 	return cnt;
5029 }
5030 
5031 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5032 tracing_thresh_read(struct file *filp, char __user *ubuf,
5033 		    size_t cnt, loff_t *ppos)
5034 {
5035 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5036 }
5037 
5038 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5039 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5040 		     size_t cnt, loff_t *ppos)
5041 {
5042 	struct trace_array *tr = filp->private_data;
5043 	int ret;
5044 
5045 	mutex_lock(&trace_types_lock);
5046 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5047 	if (ret < 0)
5048 		goto out;
5049 
5050 	if (tr->current_trace->update_thresh) {
5051 		ret = tr->current_trace->update_thresh(tr);
5052 		if (ret < 0)
5053 			goto out;
5054 	}
5055 
5056 	ret = cnt;
5057 out:
5058 	mutex_unlock(&trace_types_lock);
5059 
5060 	return ret;
5061 }
5062 
5063 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5064 
5065 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5066 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5067 		     size_t cnt, loff_t *ppos)
5068 {
5069 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5070 }
5071 
5072 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5073 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5074 		      size_t cnt, loff_t *ppos)
5075 {
5076 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5077 }
5078 
5079 #endif
5080 
tracing_open_pipe(struct inode * inode,struct file * filp)5081 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5082 {
5083 	struct trace_array *tr = inode->i_private;
5084 	struct trace_iterator *iter;
5085 	int ret = 0;
5086 
5087 	if (tracing_disabled)
5088 		return -ENODEV;
5089 
5090 	if (trace_array_get(tr) < 0)
5091 		return -ENODEV;
5092 
5093 	mutex_lock(&trace_types_lock);
5094 
5095 	/* create a buffer to store the information to pass to userspace */
5096 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5097 	if (!iter) {
5098 		ret = -ENOMEM;
5099 		__trace_array_put(tr);
5100 		goto out;
5101 	}
5102 
5103 	trace_seq_init(&iter->seq);
5104 	iter->trace = tr->current_trace;
5105 
5106 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5107 		ret = -ENOMEM;
5108 		goto fail;
5109 	}
5110 
5111 	/* trace pipe does not show start of buffer */
5112 	cpumask_setall(iter->started);
5113 
5114 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5115 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5116 
5117 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5118 	if (trace_clocks[tr->clock_id].in_ns)
5119 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5120 
5121 	iter->tr = tr;
5122 	iter->trace_buffer = &tr->trace_buffer;
5123 	iter->cpu_file = tracing_get_cpu(inode);
5124 	mutex_init(&iter->mutex);
5125 	filp->private_data = iter;
5126 
5127 	if (iter->trace->pipe_open)
5128 		iter->trace->pipe_open(iter);
5129 
5130 	nonseekable_open(inode, filp);
5131 
5132 	tr->current_trace->ref++;
5133 out:
5134 	mutex_unlock(&trace_types_lock);
5135 	return ret;
5136 
5137 fail:
5138 	kfree(iter->trace);
5139 	kfree(iter);
5140 	__trace_array_put(tr);
5141 	mutex_unlock(&trace_types_lock);
5142 	return ret;
5143 }
5144 
tracing_release_pipe(struct inode * inode,struct file * file)5145 static int tracing_release_pipe(struct inode *inode, struct file *file)
5146 {
5147 	struct trace_iterator *iter = file->private_data;
5148 	struct trace_array *tr = inode->i_private;
5149 
5150 	mutex_lock(&trace_types_lock);
5151 
5152 	tr->current_trace->ref--;
5153 
5154 	if (iter->trace->pipe_close)
5155 		iter->trace->pipe_close(iter);
5156 
5157 	mutex_unlock(&trace_types_lock);
5158 
5159 	free_cpumask_var(iter->started);
5160 	mutex_destroy(&iter->mutex);
5161 	kfree(iter);
5162 
5163 	trace_array_put(tr);
5164 
5165 	return 0;
5166 }
5167 
5168 static unsigned int
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5169 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5170 {
5171 	struct trace_array *tr = iter->tr;
5172 
5173 	/* Iterators are static, they should be filled or empty */
5174 	if (trace_buffer_iter(iter, iter->cpu_file))
5175 		return POLLIN | POLLRDNORM;
5176 
5177 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5178 		/*
5179 		 * Always select as readable when in blocking mode
5180 		 */
5181 		return POLLIN | POLLRDNORM;
5182 	else
5183 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5184 					     filp, poll_table);
5185 }
5186 
5187 static unsigned int
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5188 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5189 {
5190 	struct trace_iterator *iter = filp->private_data;
5191 
5192 	return trace_poll(iter, filp, poll_table);
5193 }
5194 
5195 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5196 static int tracing_wait_pipe(struct file *filp)
5197 {
5198 	struct trace_iterator *iter = filp->private_data;
5199 	int ret;
5200 
5201 	while (trace_empty(iter)) {
5202 
5203 		if ((filp->f_flags & O_NONBLOCK)) {
5204 			return -EAGAIN;
5205 		}
5206 
5207 		/*
5208 		 * We block until we read something and tracing is disabled.
5209 		 * We still block if tracing is disabled, but we have never
5210 		 * read anything. This allows a user to cat this file, and
5211 		 * then enable tracing. But after we have read something,
5212 		 * we give an EOF when tracing is again disabled.
5213 		 *
5214 		 * iter->pos will be 0 if we haven't read anything.
5215 		 */
5216 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5217 			break;
5218 
5219 		mutex_unlock(&iter->mutex);
5220 
5221 		ret = wait_on_pipe(iter, false);
5222 
5223 		mutex_lock(&iter->mutex);
5224 
5225 		if (ret)
5226 			return ret;
5227 	}
5228 
5229 	return 1;
5230 }
5231 
5232 /*
5233  * Consumer reader.
5234  */
5235 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5236 tracing_read_pipe(struct file *filp, char __user *ubuf,
5237 		  size_t cnt, loff_t *ppos)
5238 {
5239 	struct trace_iterator *iter = filp->private_data;
5240 	ssize_t sret;
5241 
5242 	/*
5243 	 * Avoid more than one consumer on a single file descriptor
5244 	 * This is just a matter of traces coherency, the ring buffer itself
5245 	 * is protected.
5246 	 */
5247 	mutex_lock(&iter->mutex);
5248 
5249 	/* return any leftover data */
5250 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5251 	if (sret != -EBUSY)
5252 		goto out;
5253 
5254 	trace_seq_init(&iter->seq);
5255 
5256 	if (iter->trace->read) {
5257 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5258 		if (sret)
5259 			goto out;
5260 	}
5261 
5262 waitagain:
5263 	sret = tracing_wait_pipe(filp);
5264 	if (sret <= 0)
5265 		goto out;
5266 
5267 	/* stop when tracing is finished */
5268 	if (trace_empty(iter)) {
5269 		sret = 0;
5270 		goto out;
5271 	}
5272 
5273 	if (cnt >= PAGE_SIZE)
5274 		cnt = PAGE_SIZE - 1;
5275 
5276 	/* reset all but tr, trace, and overruns */
5277 	memset(&iter->seq, 0,
5278 	       sizeof(struct trace_iterator) -
5279 	       offsetof(struct trace_iterator, seq));
5280 	cpumask_clear(iter->started);
5281 	iter->pos = -1;
5282 
5283 	trace_event_read_lock();
5284 	trace_access_lock(iter->cpu_file);
5285 	while (trace_find_next_entry_inc(iter) != NULL) {
5286 		enum print_line_t ret;
5287 		int save_len = iter->seq.seq.len;
5288 
5289 		ret = print_trace_line(iter);
5290 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5291 			/* don't print partial lines */
5292 			iter->seq.seq.len = save_len;
5293 			break;
5294 		}
5295 		if (ret != TRACE_TYPE_NO_CONSUME)
5296 			trace_consume(iter);
5297 
5298 		if (trace_seq_used(&iter->seq) >= cnt)
5299 			break;
5300 
5301 		/*
5302 		 * Setting the full flag means we reached the trace_seq buffer
5303 		 * size and we should leave by partial output condition above.
5304 		 * One of the trace_seq_* functions is not used properly.
5305 		 */
5306 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5307 			  iter->ent->type);
5308 	}
5309 	trace_access_unlock(iter->cpu_file);
5310 	trace_event_read_unlock();
5311 
5312 	/* Now copy what we have to the user */
5313 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5314 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5315 		trace_seq_init(&iter->seq);
5316 
5317 	/*
5318 	 * If there was nothing to send to user, in spite of consuming trace
5319 	 * entries, go back to wait for more entries.
5320 	 */
5321 	if (sret == -EBUSY)
5322 		goto waitagain;
5323 
5324 out:
5325 	mutex_unlock(&iter->mutex);
5326 
5327 	return sret;
5328 }
5329 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)5330 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5331 				     unsigned int idx)
5332 {
5333 	__free_page(spd->pages[idx]);
5334 }
5335 
5336 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5337 	.can_merge		= 0,
5338 	.confirm		= generic_pipe_buf_confirm,
5339 	.release		= generic_pipe_buf_release,
5340 	.steal			= generic_pipe_buf_steal,
5341 	.get			= generic_pipe_buf_get,
5342 };
5343 
5344 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)5345 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5346 {
5347 	size_t count;
5348 	int save_len;
5349 	int ret;
5350 
5351 	/* Seq buffer is page-sized, exactly what we need. */
5352 	for (;;) {
5353 		save_len = iter->seq.seq.len;
5354 		ret = print_trace_line(iter);
5355 
5356 		if (trace_seq_has_overflowed(&iter->seq)) {
5357 			iter->seq.seq.len = save_len;
5358 			break;
5359 		}
5360 
5361 		/*
5362 		 * This should not be hit, because it should only
5363 		 * be set if the iter->seq overflowed. But check it
5364 		 * anyway to be safe.
5365 		 */
5366 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5367 			iter->seq.seq.len = save_len;
5368 			break;
5369 		}
5370 
5371 		count = trace_seq_used(&iter->seq) - save_len;
5372 		if (rem < count) {
5373 			rem = 0;
5374 			iter->seq.seq.len = save_len;
5375 			break;
5376 		}
5377 
5378 		if (ret != TRACE_TYPE_NO_CONSUME)
5379 			trace_consume(iter);
5380 		rem -= count;
5381 		if (!trace_find_next_entry_inc(iter))	{
5382 			rem = 0;
5383 			iter->ent = NULL;
5384 			break;
5385 		}
5386 	}
5387 
5388 	return rem;
5389 }
5390 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)5391 static ssize_t tracing_splice_read_pipe(struct file *filp,
5392 					loff_t *ppos,
5393 					struct pipe_inode_info *pipe,
5394 					size_t len,
5395 					unsigned int flags)
5396 {
5397 	struct page *pages_def[PIPE_DEF_BUFFERS];
5398 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5399 	struct trace_iterator *iter = filp->private_data;
5400 	struct splice_pipe_desc spd = {
5401 		.pages		= pages_def,
5402 		.partial	= partial_def,
5403 		.nr_pages	= 0, /* This gets updated below. */
5404 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5405 		.flags		= flags,
5406 		.ops		= &tracing_pipe_buf_ops,
5407 		.spd_release	= tracing_spd_release_pipe,
5408 	};
5409 	ssize_t ret;
5410 	size_t rem;
5411 	unsigned int i;
5412 
5413 	if (splice_grow_spd(pipe, &spd))
5414 		return -ENOMEM;
5415 
5416 	mutex_lock(&iter->mutex);
5417 
5418 	if (iter->trace->splice_read) {
5419 		ret = iter->trace->splice_read(iter, filp,
5420 					       ppos, pipe, len, flags);
5421 		if (ret)
5422 			goto out_err;
5423 	}
5424 
5425 	ret = tracing_wait_pipe(filp);
5426 	if (ret <= 0)
5427 		goto out_err;
5428 
5429 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5430 		ret = -EFAULT;
5431 		goto out_err;
5432 	}
5433 
5434 	trace_event_read_lock();
5435 	trace_access_lock(iter->cpu_file);
5436 
5437 	/* Fill as many pages as possible. */
5438 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5439 		spd.pages[i] = alloc_page(GFP_KERNEL);
5440 		if (!spd.pages[i])
5441 			break;
5442 
5443 		rem = tracing_fill_pipe_page(rem, iter);
5444 
5445 		/* Copy the data into the page, so we can start over. */
5446 		ret = trace_seq_to_buffer(&iter->seq,
5447 					  page_address(spd.pages[i]),
5448 					  trace_seq_used(&iter->seq));
5449 		if (ret < 0) {
5450 			__free_page(spd.pages[i]);
5451 			break;
5452 		}
5453 		spd.partial[i].offset = 0;
5454 		spd.partial[i].len = trace_seq_used(&iter->seq);
5455 
5456 		trace_seq_init(&iter->seq);
5457 	}
5458 
5459 	trace_access_unlock(iter->cpu_file);
5460 	trace_event_read_unlock();
5461 	mutex_unlock(&iter->mutex);
5462 
5463 	spd.nr_pages = i;
5464 
5465 	if (i)
5466 		ret = splice_to_pipe(pipe, &spd);
5467 	else
5468 		ret = 0;
5469 out:
5470 	splice_shrink_spd(&spd);
5471 	return ret;
5472 
5473 out_err:
5474 	mutex_unlock(&iter->mutex);
5475 	goto out;
5476 }
5477 
5478 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5479 tracing_entries_read(struct file *filp, char __user *ubuf,
5480 		     size_t cnt, loff_t *ppos)
5481 {
5482 	struct inode *inode = file_inode(filp);
5483 	struct trace_array *tr = inode->i_private;
5484 	int cpu = tracing_get_cpu(inode);
5485 	char buf[64];
5486 	int r = 0;
5487 	ssize_t ret;
5488 
5489 	mutex_lock(&trace_types_lock);
5490 
5491 	if (cpu == RING_BUFFER_ALL_CPUS) {
5492 		int cpu, buf_size_same;
5493 		unsigned long size;
5494 
5495 		size = 0;
5496 		buf_size_same = 1;
5497 		/* check if all cpu sizes are same */
5498 		for_each_tracing_cpu(cpu) {
5499 			/* fill in the size from first enabled cpu */
5500 			if (size == 0)
5501 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5502 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5503 				buf_size_same = 0;
5504 				break;
5505 			}
5506 		}
5507 
5508 		if (buf_size_same) {
5509 			if (!ring_buffer_expanded)
5510 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5511 					    size >> 10,
5512 					    trace_buf_size >> 10);
5513 			else
5514 				r = sprintf(buf, "%lu\n", size >> 10);
5515 		} else
5516 			r = sprintf(buf, "X\n");
5517 	} else
5518 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5519 
5520 	mutex_unlock(&trace_types_lock);
5521 
5522 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5523 	return ret;
5524 }
5525 
5526 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5527 tracing_entries_write(struct file *filp, const char __user *ubuf,
5528 		      size_t cnt, loff_t *ppos)
5529 {
5530 	struct inode *inode = file_inode(filp);
5531 	struct trace_array *tr = inode->i_private;
5532 	unsigned long val;
5533 	int ret;
5534 
5535 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5536 	if (ret)
5537 		return ret;
5538 
5539 	/* must have at least 1 entry */
5540 	if (!val)
5541 		return -EINVAL;
5542 
5543 	/* value is in KB */
5544 	val <<= 10;
5545 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5546 	if (ret < 0)
5547 		return ret;
5548 
5549 	*ppos += cnt;
5550 
5551 	return cnt;
5552 }
5553 
5554 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5555 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5556 				size_t cnt, loff_t *ppos)
5557 {
5558 	struct trace_array *tr = filp->private_data;
5559 	char buf[64];
5560 	int r, cpu;
5561 	unsigned long size = 0, expanded_size = 0;
5562 
5563 	mutex_lock(&trace_types_lock);
5564 	for_each_tracing_cpu(cpu) {
5565 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5566 		if (!ring_buffer_expanded)
5567 			expanded_size += trace_buf_size >> 10;
5568 	}
5569 	if (ring_buffer_expanded)
5570 		r = sprintf(buf, "%lu\n", size);
5571 	else
5572 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5573 	mutex_unlock(&trace_types_lock);
5574 
5575 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5576 }
5577 
5578 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5579 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5580 			  size_t cnt, loff_t *ppos)
5581 {
5582 	/*
5583 	 * There is no need to read what the user has written, this function
5584 	 * is just to make sure that there is no error when "echo" is used
5585 	 */
5586 
5587 	*ppos += cnt;
5588 
5589 	return cnt;
5590 }
5591 
5592 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)5593 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5594 {
5595 	struct trace_array *tr = inode->i_private;
5596 
5597 	/* disable tracing ? */
5598 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5599 		tracer_tracing_off(tr);
5600 	/* resize the ring buffer to 0 */
5601 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5602 
5603 	trace_array_put(tr);
5604 
5605 	return 0;
5606 }
5607 
5608 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)5609 tracing_mark_write(struct file *filp, const char __user *ubuf,
5610 					size_t cnt, loff_t *fpos)
5611 {
5612 	unsigned long addr = (unsigned long)ubuf;
5613 	struct trace_array *tr = filp->private_data;
5614 	struct ring_buffer_event *event;
5615 	struct ring_buffer *buffer;
5616 	struct print_entry *entry;
5617 	unsigned long irq_flags;
5618 	struct page *pages[2];
5619 	void *map_page[2];
5620 	int nr_pages = 1;
5621 	ssize_t written;
5622 	int offset;
5623 	int size;
5624 	int len;
5625 	int ret;
5626 	int i;
5627 
5628 	if (tracing_disabled)
5629 		return -EINVAL;
5630 
5631 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5632 		return -EINVAL;
5633 
5634 	if (cnt > TRACE_BUF_SIZE)
5635 		cnt = TRACE_BUF_SIZE;
5636 
5637 	/*
5638 	 * Userspace is injecting traces into the kernel trace buffer.
5639 	 * We want to be as non intrusive as possible.
5640 	 * To do so, we do not want to allocate any special buffers
5641 	 * or take any locks, but instead write the userspace data
5642 	 * straight into the ring buffer.
5643 	 *
5644 	 * First we need to pin the userspace buffer into memory,
5645 	 * which, most likely it is, because it just referenced it.
5646 	 * But there's no guarantee that it is. By using get_user_pages_fast()
5647 	 * and kmap_atomic/kunmap_atomic() we can get access to the
5648 	 * pages directly. We then write the data directly into the
5649 	 * ring buffer.
5650 	 */
5651 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5652 
5653 	/* check if we cross pages */
5654 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5655 		nr_pages = 2;
5656 
5657 	offset = addr & (PAGE_SIZE - 1);
5658 	addr &= PAGE_MASK;
5659 
5660 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5661 	if (ret < nr_pages) {
5662 		while (--ret >= 0)
5663 			put_page(pages[ret]);
5664 		written = -EFAULT;
5665 		goto out;
5666 	}
5667 
5668 	for (i = 0; i < nr_pages; i++)
5669 		map_page[i] = kmap_atomic(pages[i]);
5670 
5671 	local_save_flags(irq_flags);
5672 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5673 	buffer = tr->trace_buffer.buffer;
5674 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5675 					  irq_flags, preempt_count());
5676 	if (!event) {
5677 		/* Ring buffer disabled, return as if not open for write */
5678 		written = -EBADF;
5679 		goto out_unlock;
5680 	}
5681 
5682 	entry = ring_buffer_event_data(event);
5683 	entry->ip = _THIS_IP_;
5684 
5685 	if (nr_pages == 2) {
5686 		len = PAGE_SIZE - offset;
5687 		memcpy(&entry->buf, map_page[0] + offset, len);
5688 		memcpy(&entry->buf[len], map_page[1], cnt - len);
5689 	} else
5690 		memcpy(&entry->buf, map_page[0] + offset, cnt);
5691 
5692 	if (entry->buf[cnt - 1] != '\n') {
5693 		entry->buf[cnt] = '\n';
5694 		entry->buf[cnt + 1] = '\0';
5695 	} else
5696 		entry->buf[cnt] = '\0';
5697 
5698 	__buffer_unlock_commit(buffer, event);
5699 
5700 	written = cnt;
5701 
5702 	*fpos += written;
5703 
5704  out_unlock:
5705 	for (i = nr_pages - 1; i >= 0; i--) {
5706 		kunmap_atomic(map_page[i]);
5707 		put_page(pages[i]);
5708 	}
5709  out:
5710 	return written;
5711 }
5712 
tracing_clock_show(struct seq_file * m,void * v)5713 static int tracing_clock_show(struct seq_file *m, void *v)
5714 {
5715 	struct trace_array *tr = m->private;
5716 	int i;
5717 
5718 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5719 		seq_printf(m,
5720 			"%s%s%s%s", i ? " " : "",
5721 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5722 			i == tr->clock_id ? "]" : "");
5723 	seq_putc(m, '\n');
5724 
5725 	return 0;
5726 }
5727 
tracing_set_clock(struct trace_array * tr,const char * clockstr)5728 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5729 {
5730 	int i;
5731 
5732 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5733 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5734 			break;
5735 	}
5736 	if (i == ARRAY_SIZE(trace_clocks))
5737 		return -EINVAL;
5738 
5739 	mutex_lock(&trace_types_lock);
5740 
5741 	tr->clock_id = i;
5742 
5743 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5744 
5745 	/*
5746 	 * New clock may not be consistent with the previous clock.
5747 	 * Reset the buffer so that it doesn't have incomparable timestamps.
5748 	 */
5749 	tracing_reset_online_cpus(&tr->trace_buffer);
5750 
5751 #ifdef CONFIG_TRACER_MAX_TRACE
5752 	if (tr->max_buffer.buffer)
5753 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5754 	tracing_reset_online_cpus(&tr->max_buffer);
5755 #endif
5756 
5757 	mutex_unlock(&trace_types_lock);
5758 
5759 	return 0;
5760 }
5761 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)5762 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5763 				   size_t cnt, loff_t *fpos)
5764 {
5765 	struct seq_file *m = filp->private_data;
5766 	struct trace_array *tr = m->private;
5767 	char buf[64];
5768 	const char *clockstr;
5769 	int ret;
5770 
5771 	if (cnt >= sizeof(buf))
5772 		return -EINVAL;
5773 
5774 	if (copy_from_user(buf, ubuf, cnt))
5775 		return -EFAULT;
5776 
5777 	buf[cnt] = 0;
5778 
5779 	clockstr = strstrip(buf);
5780 
5781 	ret = tracing_set_clock(tr, clockstr);
5782 	if (ret)
5783 		return ret;
5784 
5785 	*fpos += cnt;
5786 
5787 	return cnt;
5788 }
5789 
tracing_clock_open(struct inode * inode,struct file * file)5790 static int tracing_clock_open(struct inode *inode, struct file *file)
5791 {
5792 	struct trace_array *tr = inode->i_private;
5793 	int ret;
5794 
5795 	if (tracing_disabled)
5796 		return -ENODEV;
5797 
5798 	if (trace_array_get(tr))
5799 		return -ENODEV;
5800 
5801 	ret = single_open(file, tracing_clock_show, inode->i_private);
5802 	if (ret < 0)
5803 		trace_array_put(tr);
5804 
5805 	return ret;
5806 }
5807 
5808 struct ftrace_buffer_info {
5809 	struct trace_iterator	iter;
5810 	void			*spare;
5811 	unsigned int		read;
5812 };
5813 
5814 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)5815 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5816 {
5817 	struct trace_array *tr = inode->i_private;
5818 	struct trace_iterator *iter;
5819 	struct seq_file *m;
5820 	int ret = 0;
5821 
5822 	if (trace_array_get(tr) < 0)
5823 		return -ENODEV;
5824 
5825 	if (file->f_mode & FMODE_READ) {
5826 		iter = __tracing_open(inode, file, true);
5827 		if (IS_ERR(iter))
5828 			ret = PTR_ERR(iter);
5829 	} else {
5830 		/* Writes still need the seq_file to hold the private data */
5831 		ret = -ENOMEM;
5832 		m = kzalloc(sizeof(*m), GFP_KERNEL);
5833 		if (!m)
5834 			goto out;
5835 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5836 		if (!iter) {
5837 			kfree(m);
5838 			goto out;
5839 		}
5840 		ret = 0;
5841 
5842 		iter->tr = tr;
5843 		iter->trace_buffer = &tr->max_buffer;
5844 		iter->cpu_file = tracing_get_cpu(inode);
5845 		m->private = iter;
5846 		file->private_data = m;
5847 	}
5848 out:
5849 	if (ret < 0)
5850 		trace_array_put(tr);
5851 
5852 	return ret;
5853 }
5854 
5855 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5856 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5857 		       loff_t *ppos)
5858 {
5859 	struct seq_file *m = filp->private_data;
5860 	struct trace_iterator *iter = m->private;
5861 	struct trace_array *tr = iter->tr;
5862 	unsigned long val;
5863 	int ret;
5864 
5865 	ret = tracing_update_buffers();
5866 	if (ret < 0)
5867 		return ret;
5868 
5869 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5870 	if (ret)
5871 		return ret;
5872 
5873 	mutex_lock(&trace_types_lock);
5874 
5875 	if (tr->current_trace->use_max_tr) {
5876 		ret = -EBUSY;
5877 		goto out;
5878 	}
5879 
5880 	switch (val) {
5881 	case 0:
5882 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5883 			ret = -EINVAL;
5884 			break;
5885 		}
5886 		if (tr->allocated_snapshot)
5887 			free_snapshot(tr);
5888 		break;
5889 	case 1:
5890 /* Only allow per-cpu swap if the ring buffer supports it */
5891 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5892 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5893 			ret = -EINVAL;
5894 			break;
5895 		}
5896 #endif
5897 		if (!tr->allocated_snapshot) {
5898 			ret = alloc_snapshot(tr);
5899 			if (ret < 0)
5900 				break;
5901 		}
5902 		local_irq_disable();
5903 		/* Now, we're going to swap */
5904 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5905 			update_max_tr(tr, current, smp_processor_id());
5906 		else
5907 			update_max_tr_single(tr, current, iter->cpu_file);
5908 		local_irq_enable();
5909 		break;
5910 	default:
5911 		if (tr->allocated_snapshot) {
5912 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5913 				tracing_reset_online_cpus(&tr->max_buffer);
5914 			else
5915 				tracing_reset(&tr->max_buffer, iter->cpu_file);
5916 		}
5917 		break;
5918 	}
5919 
5920 	if (ret >= 0) {
5921 		*ppos += cnt;
5922 		ret = cnt;
5923 	}
5924 out:
5925 	mutex_unlock(&trace_types_lock);
5926 	return ret;
5927 }
5928 
tracing_snapshot_release(struct inode * inode,struct file * file)5929 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5930 {
5931 	struct seq_file *m = file->private_data;
5932 	int ret;
5933 
5934 	ret = tracing_release(inode, file);
5935 
5936 	if (file->f_mode & FMODE_READ)
5937 		return ret;
5938 
5939 	/* If write only, the seq_file is just a stub */
5940 	if (m)
5941 		kfree(m->private);
5942 	kfree(m);
5943 
5944 	return 0;
5945 }
5946 
5947 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5948 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5949 				    size_t count, loff_t *ppos);
5950 static int tracing_buffers_release(struct inode *inode, struct file *file);
5951 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5952 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5953 
snapshot_raw_open(struct inode * inode,struct file * filp)5954 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5955 {
5956 	struct ftrace_buffer_info *info;
5957 	int ret;
5958 
5959 	ret = tracing_buffers_open(inode, filp);
5960 	if (ret < 0)
5961 		return ret;
5962 
5963 	info = filp->private_data;
5964 
5965 	if (info->iter.trace->use_max_tr) {
5966 		tracing_buffers_release(inode, filp);
5967 		return -EBUSY;
5968 	}
5969 
5970 	info->iter.snapshot = true;
5971 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5972 
5973 	return ret;
5974 }
5975 
5976 #endif /* CONFIG_TRACER_SNAPSHOT */
5977 
5978 
5979 static const struct file_operations tracing_thresh_fops = {
5980 	.open		= tracing_open_generic,
5981 	.read		= tracing_thresh_read,
5982 	.write		= tracing_thresh_write,
5983 	.llseek		= generic_file_llseek,
5984 };
5985 
5986 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5987 static const struct file_operations tracing_max_lat_fops = {
5988 	.open		= tracing_open_generic,
5989 	.read		= tracing_max_lat_read,
5990 	.write		= tracing_max_lat_write,
5991 	.llseek		= generic_file_llseek,
5992 };
5993 #endif
5994 
5995 static const struct file_operations set_tracer_fops = {
5996 	.open		= tracing_open_generic,
5997 	.read		= tracing_set_trace_read,
5998 	.write		= tracing_set_trace_write,
5999 	.llseek		= generic_file_llseek,
6000 };
6001 
6002 static const struct file_operations tracing_pipe_fops = {
6003 	.open		= tracing_open_pipe,
6004 	.poll		= tracing_poll_pipe,
6005 	.read		= tracing_read_pipe,
6006 	.splice_read	= tracing_splice_read_pipe,
6007 	.release	= tracing_release_pipe,
6008 	.llseek		= no_llseek,
6009 };
6010 
6011 static const struct file_operations tracing_entries_fops = {
6012 	.open		= tracing_open_generic_tr,
6013 	.read		= tracing_entries_read,
6014 	.write		= tracing_entries_write,
6015 	.llseek		= generic_file_llseek,
6016 	.release	= tracing_release_generic_tr,
6017 };
6018 
6019 static const struct file_operations tracing_total_entries_fops = {
6020 	.open		= tracing_open_generic_tr,
6021 	.read		= tracing_total_entries_read,
6022 	.llseek		= generic_file_llseek,
6023 	.release	= tracing_release_generic_tr,
6024 };
6025 
6026 static const struct file_operations tracing_free_buffer_fops = {
6027 	.open		= tracing_open_generic_tr,
6028 	.write		= tracing_free_buffer_write,
6029 	.release	= tracing_free_buffer_release,
6030 };
6031 
6032 static const struct file_operations tracing_mark_fops = {
6033 	.open		= tracing_open_generic_tr,
6034 	.write		= tracing_mark_write,
6035 	.llseek		= generic_file_llseek,
6036 	.release	= tracing_release_generic_tr,
6037 };
6038 
6039 static const struct file_operations trace_clock_fops = {
6040 	.open		= tracing_clock_open,
6041 	.read		= seq_read,
6042 	.llseek		= seq_lseek,
6043 	.release	= tracing_single_release_tr,
6044 	.write		= tracing_clock_write,
6045 };
6046 
6047 #ifdef CONFIG_TRACER_SNAPSHOT
6048 static const struct file_operations snapshot_fops = {
6049 	.open		= tracing_snapshot_open,
6050 	.read		= seq_read,
6051 	.write		= tracing_snapshot_write,
6052 	.llseek		= tracing_lseek,
6053 	.release	= tracing_snapshot_release,
6054 };
6055 
6056 static const struct file_operations snapshot_raw_fops = {
6057 	.open		= snapshot_raw_open,
6058 	.read		= tracing_buffers_read,
6059 	.release	= tracing_buffers_release,
6060 	.splice_read	= tracing_buffers_splice_read,
6061 	.llseek		= no_llseek,
6062 };
6063 
6064 #endif /* CONFIG_TRACER_SNAPSHOT */
6065 
tracing_buffers_open(struct inode * inode,struct file * filp)6066 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6067 {
6068 	struct trace_array *tr = inode->i_private;
6069 	struct ftrace_buffer_info *info;
6070 	int ret;
6071 
6072 	if (tracing_disabled)
6073 		return -ENODEV;
6074 
6075 	if (trace_array_get(tr) < 0)
6076 		return -ENODEV;
6077 
6078 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6079 	if (!info) {
6080 		trace_array_put(tr);
6081 		return -ENOMEM;
6082 	}
6083 
6084 	mutex_lock(&trace_types_lock);
6085 
6086 	info->iter.tr		= tr;
6087 	info->iter.cpu_file	= tracing_get_cpu(inode);
6088 	info->iter.trace	= tr->current_trace;
6089 	info->iter.trace_buffer = &tr->trace_buffer;
6090 	info->spare		= NULL;
6091 	/* Force reading ring buffer for first read */
6092 	info->read		= (unsigned int)-1;
6093 
6094 	filp->private_data = info;
6095 
6096 	tr->current_trace->ref++;
6097 
6098 	mutex_unlock(&trace_types_lock);
6099 
6100 	ret = nonseekable_open(inode, filp);
6101 	if (ret < 0)
6102 		trace_array_put(tr);
6103 
6104 	return ret;
6105 }
6106 
6107 static unsigned int
tracing_buffers_poll(struct file * filp,poll_table * poll_table)6108 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6109 {
6110 	struct ftrace_buffer_info *info = filp->private_data;
6111 	struct trace_iterator *iter = &info->iter;
6112 
6113 	return trace_poll(iter, filp, poll_table);
6114 }
6115 
6116 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)6117 tracing_buffers_read(struct file *filp, char __user *ubuf,
6118 		     size_t count, loff_t *ppos)
6119 {
6120 	struct ftrace_buffer_info *info = filp->private_data;
6121 	struct trace_iterator *iter = &info->iter;
6122 	ssize_t ret;
6123 	ssize_t size;
6124 
6125 	if (!count)
6126 		return 0;
6127 
6128 #ifdef CONFIG_TRACER_MAX_TRACE
6129 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6130 		return -EBUSY;
6131 #endif
6132 
6133 	if (!info->spare)
6134 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6135 							  iter->cpu_file);
6136 	if (!info->spare)
6137 		return -ENOMEM;
6138 
6139 	/* Do we have previous read data to read? */
6140 	if (info->read < PAGE_SIZE)
6141 		goto read;
6142 
6143  again:
6144 	trace_access_lock(iter->cpu_file);
6145 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6146 				    &info->spare,
6147 				    count,
6148 				    iter->cpu_file, 0);
6149 	trace_access_unlock(iter->cpu_file);
6150 
6151 	if (ret < 0) {
6152 		if (trace_empty(iter)) {
6153 			if ((filp->f_flags & O_NONBLOCK))
6154 				return -EAGAIN;
6155 
6156 			ret = wait_on_pipe(iter, false);
6157 			if (ret)
6158 				return ret;
6159 
6160 			goto again;
6161 		}
6162 		return 0;
6163 	}
6164 
6165 	info->read = 0;
6166  read:
6167 	size = PAGE_SIZE - info->read;
6168 	if (size > count)
6169 		size = count;
6170 
6171 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6172 	if (ret == size)
6173 		return -EFAULT;
6174 
6175 	size -= ret;
6176 
6177 	*ppos += size;
6178 	info->read += size;
6179 
6180 	return size;
6181 }
6182 
tracing_buffers_release(struct inode * inode,struct file * file)6183 static int tracing_buffers_release(struct inode *inode, struct file *file)
6184 {
6185 	struct ftrace_buffer_info *info = file->private_data;
6186 	struct trace_iterator *iter = &info->iter;
6187 
6188 	mutex_lock(&trace_types_lock);
6189 
6190 	iter->tr->current_trace->ref--;
6191 
6192 	__trace_array_put(iter->tr);
6193 
6194 	if (info->spare)
6195 		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6196 	kfree(info);
6197 
6198 	mutex_unlock(&trace_types_lock);
6199 
6200 	return 0;
6201 }
6202 
6203 struct buffer_ref {
6204 	struct ring_buffer	*buffer;
6205 	void			*page;
6206 	int			ref;
6207 };
6208 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)6209 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6210 				    struct pipe_buffer *buf)
6211 {
6212 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6213 
6214 	if (--ref->ref)
6215 		return;
6216 
6217 	ring_buffer_free_read_page(ref->buffer, ref->page);
6218 	kfree(ref);
6219 	buf->private = 0;
6220 }
6221 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)6222 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6223 				struct pipe_buffer *buf)
6224 {
6225 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6226 
6227 	ref->ref++;
6228 }
6229 
6230 /* Pipe buffer operations for a buffer. */
6231 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6232 	.can_merge		= 0,
6233 	.confirm		= generic_pipe_buf_confirm,
6234 	.release		= buffer_pipe_buf_release,
6235 	.steal			= generic_pipe_buf_steal,
6236 	.get			= buffer_pipe_buf_get,
6237 };
6238 
6239 /*
6240  * Callback from splice_to_pipe(), if we need to release some pages
6241  * at the end of the spd in case we error'ed out in filling the pipe.
6242  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)6243 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6244 {
6245 	struct buffer_ref *ref =
6246 		(struct buffer_ref *)spd->partial[i].private;
6247 
6248 	if (--ref->ref)
6249 		return;
6250 
6251 	ring_buffer_free_read_page(ref->buffer, ref->page);
6252 	kfree(ref);
6253 	spd->partial[i].private = 0;
6254 }
6255 
6256 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6257 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6258 			    struct pipe_inode_info *pipe, size_t len,
6259 			    unsigned int flags)
6260 {
6261 	struct ftrace_buffer_info *info = file->private_data;
6262 	struct trace_iterator *iter = &info->iter;
6263 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6264 	struct page *pages_def[PIPE_DEF_BUFFERS];
6265 	struct splice_pipe_desc spd = {
6266 		.pages		= pages_def,
6267 		.partial	= partial_def,
6268 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6269 		.flags		= flags,
6270 		.ops		= &buffer_pipe_buf_ops,
6271 		.spd_release	= buffer_spd_release,
6272 	};
6273 	struct buffer_ref *ref;
6274 	int entries, i;
6275 	ssize_t ret = 0;
6276 
6277 #ifdef CONFIG_TRACER_MAX_TRACE
6278 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6279 		return -EBUSY;
6280 #endif
6281 
6282 	if (*ppos & (PAGE_SIZE - 1))
6283 		return -EINVAL;
6284 
6285 	if (len & (PAGE_SIZE - 1)) {
6286 		if (len < PAGE_SIZE)
6287 			return -EINVAL;
6288 		len &= PAGE_MASK;
6289 	}
6290 
6291 	if (splice_grow_spd(pipe, &spd))
6292 		return -ENOMEM;
6293 
6294  again:
6295 	trace_access_lock(iter->cpu_file);
6296 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6297 
6298 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6299 		struct page *page;
6300 		int r;
6301 
6302 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6303 		if (!ref) {
6304 			ret = -ENOMEM;
6305 			break;
6306 		}
6307 
6308 		ref->ref = 1;
6309 		ref->buffer = iter->trace_buffer->buffer;
6310 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6311 		if (!ref->page) {
6312 			ret = -ENOMEM;
6313 			kfree(ref);
6314 			break;
6315 		}
6316 
6317 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6318 					  len, iter->cpu_file, 1);
6319 		if (r < 0) {
6320 			ring_buffer_free_read_page(ref->buffer, ref->page);
6321 			kfree(ref);
6322 			break;
6323 		}
6324 
6325 		page = virt_to_page(ref->page);
6326 
6327 		spd.pages[i] = page;
6328 		spd.partial[i].len = PAGE_SIZE;
6329 		spd.partial[i].offset = 0;
6330 		spd.partial[i].private = (unsigned long)ref;
6331 		spd.nr_pages++;
6332 		*ppos += PAGE_SIZE;
6333 
6334 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6335 	}
6336 
6337 	trace_access_unlock(iter->cpu_file);
6338 	spd.nr_pages = i;
6339 
6340 	/* did we read anything? */
6341 	if (!spd.nr_pages) {
6342 		if (ret)
6343 			goto out;
6344 
6345 		ret = -EAGAIN;
6346 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6347 			goto out;
6348 
6349 		ret = wait_on_pipe(iter, true);
6350 		if (ret)
6351 			goto out;
6352 
6353 		goto again;
6354 	}
6355 
6356 	ret = splice_to_pipe(pipe, &spd);
6357 out:
6358 	splice_shrink_spd(&spd);
6359 
6360 	return ret;
6361 }
6362 
6363 static const struct file_operations tracing_buffers_fops = {
6364 	.open		= tracing_buffers_open,
6365 	.read		= tracing_buffers_read,
6366 	.poll		= tracing_buffers_poll,
6367 	.release	= tracing_buffers_release,
6368 	.splice_read	= tracing_buffers_splice_read,
6369 	.llseek		= no_llseek,
6370 };
6371 
6372 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)6373 tracing_stats_read(struct file *filp, char __user *ubuf,
6374 		   size_t count, loff_t *ppos)
6375 {
6376 	struct inode *inode = file_inode(filp);
6377 	struct trace_array *tr = inode->i_private;
6378 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6379 	int cpu = tracing_get_cpu(inode);
6380 	struct trace_seq *s;
6381 	unsigned long cnt;
6382 	unsigned long long t;
6383 	unsigned long usec_rem;
6384 
6385 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6386 	if (!s)
6387 		return -ENOMEM;
6388 
6389 	trace_seq_init(s);
6390 
6391 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6392 	trace_seq_printf(s, "entries: %ld\n", cnt);
6393 
6394 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6395 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6396 
6397 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6398 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6399 
6400 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6401 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6402 
6403 	if (trace_clocks[tr->clock_id].in_ns) {
6404 		/* local or global for trace_clock */
6405 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6406 		usec_rem = do_div(t, USEC_PER_SEC);
6407 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6408 								t, usec_rem);
6409 
6410 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6411 		usec_rem = do_div(t, USEC_PER_SEC);
6412 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6413 	} else {
6414 		/* counter or tsc mode for trace_clock */
6415 		trace_seq_printf(s, "oldest event ts: %llu\n",
6416 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6417 
6418 		trace_seq_printf(s, "now ts: %llu\n",
6419 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6420 	}
6421 
6422 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6423 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6424 
6425 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6426 	trace_seq_printf(s, "read events: %ld\n", cnt);
6427 
6428 	count = simple_read_from_buffer(ubuf, count, ppos,
6429 					s->buffer, trace_seq_used(s));
6430 
6431 	kfree(s);
6432 
6433 	return count;
6434 }
6435 
6436 static const struct file_operations tracing_stats_fops = {
6437 	.open		= tracing_open_generic_tr,
6438 	.read		= tracing_stats_read,
6439 	.llseek		= generic_file_llseek,
6440 	.release	= tracing_release_generic_tr,
6441 };
6442 
6443 #ifdef CONFIG_DYNAMIC_FTRACE
6444 
ftrace_arch_read_dyn_info(char * buf,int size)6445 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6446 {
6447 	return 0;
6448 }
6449 
6450 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6451 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6452 		  size_t cnt, loff_t *ppos)
6453 {
6454 	static char ftrace_dyn_info_buffer[1024];
6455 	static DEFINE_MUTEX(dyn_info_mutex);
6456 	unsigned long *p = filp->private_data;
6457 	char *buf = ftrace_dyn_info_buffer;
6458 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6459 	int r;
6460 
6461 	mutex_lock(&dyn_info_mutex);
6462 	r = sprintf(buf, "%ld ", *p);
6463 
6464 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6465 	buf[r++] = '\n';
6466 
6467 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6468 
6469 	mutex_unlock(&dyn_info_mutex);
6470 
6471 	return r;
6472 }
6473 
6474 static const struct file_operations tracing_dyn_info_fops = {
6475 	.open		= tracing_open_generic,
6476 	.read		= tracing_read_dyn_info,
6477 	.llseek		= generic_file_llseek,
6478 };
6479 #endif /* CONFIG_DYNAMIC_FTRACE */
6480 
6481 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6482 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,void ** data)6483 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6484 {
6485 	tracing_snapshot();
6486 }
6487 
6488 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,void ** data)6489 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6490 {
6491 	unsigned long *count = (long *)data;
6492 
6493 	if (!*count)
6494 		return;
6495 
6496 	if (*count != -1)
6497 		(*count)--;
6498 
6499 	tracing_snapshot();
6500 }
6501 
6502 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)6503 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6504 		      struct ftrace_probe_ops *ops, void *data)
6505 {
6506 	long count = (long)data;
6507 
6508 	seq_printf(m, "%ps:", (void *)ip);
6509 
6510 	seq_puts(m, "snapshot");
6511 
6512 	if (count == -1)
6513 		seq_puts(m, ":unlimited\n");
6514 	else
6515 		seq_printf(m, ":count=%ld\n", count);
6516 
6517 	return 0;
6518 }
6519 
6520 static struct ftrace_probe_ops snapshot_probe_ops = {
6521 	.func			= ftrace_snapshot,
6522 	.print			= ftrace_snapshot_print,
6523 };
6524 
6525 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6526 	.func			= ftrace_count_snapshot,
6527 	.print			= ftrace_snapshot_print,
6528 };
6529 
6530 static int
ftrace_trace_snapshot_callback(struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)6531 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6532 			       char *glob, char *cmd, char *param, int enable)
6533 {
6534 	struct ftrace_probe_ops *ops;
6535 	void *count = (void *)-1;
6536 	char *number;
6537 	int ret;
6538 
6539 	/* hash funcs only work with set_ftrace_filter */
6540 	if (!enable)
6541 		return -EINVAL;
6542 
6543 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6544 
6545 	if (glob[0] == '!') {
6546 		unregister_ftrace_function_probe_func(glob+1, ops);
6547 		return 0;
6548 	}
6549 
6550 	if (!param)
6551 		goto out_reg;
6552 
6553 	number = strsep(&param, ":");
6554 
6555 	if (!strlen(number))
6556 		goto out_reg;
6557 
6558 	/*
6559 	 * We use the callback data field (which is a pointer)
6560 	 * as our counter.
6561 	 */
6562 	ret = kstrtoul(number, 0, (unsigned long *)&count);
6563 	if (ret)
6564 		return ret;
6565 
6566  out_reg:
6567 	ret = alloc_snapshot(&global_trace);
6568 	if (ret < 0)
6569 		goto out;
6570 
6571 	ret = register_ftrace_function_probe(glob, ops, count);
6572 
6573  out:
6574 	return ret < 0 ? ret : 0;
6575 }
6576 
6577 static struct ftrace_func_command ftrace_snapshot_cmd = {
6578 	.name			= "snapshot",
6579 	.func			= ftrace_trace_snapshot_callback,
6580 };
6581 
register_snapshot_cmd(void)6582 static __init int register_snapshot_cmd(void)
6583 {
6584 	return register_ftrace_command(&ftrace_snapshot_cmd);
6585 }
6586 #else
register_snapshot_cmd(void)6587 static inline __init int register_snapshot_cmd(void) { return 0; }
6588 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6589 
tracing_get_dentry(struct trace_array * tr)6590 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6591 {
6592 	if (WARN_ON(!tr->dir))
6593 		return ERR_PTR(-ENODEV);
6594 
6595 	/* Top directory uses NULL as the parent */
6596 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6597 		return NULL;
6598 
6599 	/* All sub buffers have a descriptor */
6600 	return tr->dir;
6601 }
6602 
tracing_dentry_percpu(struct trace_array * tr,int cpu)6603 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6604 {
6605 	struct dentry *d_tracer;
6606 
6607 	if (tr->percpu_dir)
6608 		return tr->percpu_dir;
6609 
6610 	d_tracer = tracing_get_dentry(tr);
6611 	if (IS_ERR(d_tracer))
6612 		return NULL;
6613 
6614 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6615 
6616 	WARN_ONCE(!tr->percpu_dir,
6617 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6618 
6619 	return tr->percpu_dir;
6620 }
6621 
6622 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)6623 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6624 		      void *data, long cpu, const struct file_operations *fops)
6625 {
6626 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6627 
6628 	if (ret) /* See tracing_get_cpu() */
6629 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6630 	return ret;
6631 }
6632 
6633 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)6634 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6635 {
6636 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6637 	struct dentry *d_cpu;
6638 	char cpu_dir[30]; /* 30 characters should be more than enough */
6639 
6640 	if (!d_percpu)
6641 		return;
6642 
6643 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6644 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6645 	if (!d_cpu) {
6646 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6647 		return;
6648 	}
6649 
6650 	/* per cpu trace_pipe */
6651 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6652 				tr, cpu, &tracing_pipe_fops);
6653 
6654 	/* per cpu trace */
6655 	trace_create_cpu_file("trace", 0644, d_cpu,
6656 				tr, cpu, &tracing_fops);
6657 
6658 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6659 				tr, cpu, &tracing_buffers_fops);
6660 
6661 	trace_create_cpu_file("stats", 0444, d_cpu,
6662 				tr, cpu, &tracing_stats_fops);
6663 
6664 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6665 				tr, cpu, &tracing_entries_fops);
6666 
6667 #ifdef CONFIG_TRACER_SNAPSHOT
6668 	trace_create_cpu_file("snapshot", 0644, d_cpu,
6669 				tr, cpu, &snapshot_fops);
6670 
6671 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6672 				tr, cpu, &snapshot_raw_fops);
6673 #endif
6674 }
6675 
6676 #ifdef CONFIG_FTRACE_SELFTEST
6677 /* Let selftest have access to static functions in this file */
6678 #include "trace_selftest.c"
6679 #endif
6680 
6681 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6682 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6683 			loff_t *ppos)
6684 {
6685 	struct trace_option_dentry *topt = filp->private_data;
6686 	char *buf;
6687 
6688 	if (topt->flags->val & topt->opt->bit)
6689 		buf = "1\n";
6690 	else
6691 		buf = "0\n";
6692 
6693 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6694 }
6695 
6696 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6697 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6698 			 loff_t *ppos)
6699 {
6700 	struct trace_option_dentry *topt = filp->private_data;
6701 	unsigned long val;
6702 	int ret;
6703 
6704 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6705 	if (ret)
6706 		return ret;
6707 
6708 	if (val != 0 && val != 1)
6709 		return -EINVAL;
6710 
6711 	if (!!(topt->flags->val & topt->opt->bit) != val) {
6712 		mutex_lock(&trace_types_lock);
6713 		ret = __set_tracer_option(topt->tr, topt->flags,
6714 					  topt->opt, !val);
6715 		mutex_unlock(&trace_types_lock);
6716 		if (ret)
6717 			return ret;
6718 	}
6719 
6720 	*ppos += cnt;
6721 
6722 	return cnt;
6723 }
6724 
6725 
6726 static const struct file_operations trace_options_fops = {
6727 	.open = tracing_open_generic,
6728 	.read = trace_options_read,
6729 	.write = trace_options_write,
6730 	.llseek	= generic_file_llseek,
6731 };
6732 
6733 /*
6734  * In order to pass in both the trace_array descriptor as well as the index
6735  * to the flag that the trace option file represents, the trace_array
6736  * has a character array of trace_flags_index[], which holds the index
6737  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6738  * The address of this character array is passed to the flag option file
6739  * read/write callbacks.
6740  *
6741  * In order to extract both the index and the trace_array descriptor,
6742  * get_tr_index() uses the following algorithm.
6743  *
6744  *   idx = *ptr;
6745  *
6746  * As the pointer itself contains the address of the index (remember
6747  * index[1] == 1).
6748  *
6749  * Then to get the trace_array descriptor, by subtracting that index
6750  * from the ptr, we get to the start of the index itself.
6751  *
6752  *   ptr - idx == &index[0]
6753  *
6754  * Then a simple container_of() from that pointer gets us to the
6755  * trace_array descriptor.
6756  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)6757 static void get_tr_index(void *data, struct trace_array **ptr,
6758 			 unsigned int *pindex)
6759 {
6760 	*pindex = *(unsigned char *)data;
6761 
6762 	*ptr = container_of(data - *pindex, struct trace_array,
6763 			    trace_flags_index);
6764 }
6765 
6766 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6767 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6768 			loff_t *ppos)
6769 {
6770 	void *tr_index = filp->private_data;
6771 	struct trace_array *tr;
6772 	unsigned int index;
6773 	char *buf;
6774 
6775 	get_tr_index(tr_index, &tr, &index);
6776 
6777 	if (tr->trace_flags & (1 << index))
6778 		buf = "1\n";
6779 	else
6780 		buf = "0\n";
6781 
6782 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6783 }
6784 
6785 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6786 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6787 			 loff_t *ppos)
6788 {
6789 	void *tr_index = filp->private_data;
6790 	struct trace_array *tr;
6791 	unsigned int index;
6792 	unsigned long val;
6793 	int ret;
6794 
6795 	get_tr_index(tr_index, &tr, &index);
6796 
6797 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6798 	if (ret)
6799 		return ret;
6800 
6801 	if (val != 0 && val != 1)
6802 		return -EINVAL;
6803 
6804 	mutex_lock(&trace_types_lock);
6805 	ret = set_tracer_flag(tr, 1 << index, val);
6806 	mutex_unlock(&trace_types_lock);
6807 
6808 	if (ret < 0)
6809 		return ret;
6810 
6811 	*ppos += cnt;
6812 
6813 	return cnt;
6814 }
6815 
6816 static const struct file_operations trace_options_core_fops = {
6817 	.open = tracing_open_generic,
6818 	.read = trace_options_core_read,
6819 	.write = trace_options_core_write,
6820 	.llseek = generic_file_llseek,
6821 };
6822 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)6823 struct dentry *trace_create_file(const char *name,
6824 				 umode_t mode,
6825 				 struct dentry *parent,
6826 				 void *data,
6827 				 const struct file_operations *fops)
6828 {
6829 	struct dentry *ret;
6830 
6831 	ret = tracefs_create_file(name, mode, parent, data, fops);
6832 	if (!ret)
6833 		pr_warn("Could not create tracefs '%s' entry\n", name);
6834 
6835 	return ret;
6836 }
6837 
6838 
trace_options_init_dentry(struct trace_array * tr)6839 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6840 {
6841 	struct dentry *d_tracer;
6842 
6843 	if (tr->options)
6844 		return tr->options;
6845 
6846 	d_tracer = tracing_get_dentry(tr);
6847 	if (IS_ERR(d_tracer))
6848 		return NULL;
6849 
6850 	tr->options = tracefs_create_dir("options", d_tracer);
6851 	if (!tr->options) {
6852 		pr_warn("Could not create tracefs directory 'options'\n");
6853 		return NULL;
6854 	}
6855 
6856 	return tr->options;
6857 }
6858 
6859 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)6860 create_trace_option_file(struct trace_array *tr,
6861 			 struct trace_option_dentry *topt,
6862 			 struct tracer_flags *flags,
6863 			 struct tracer_opt *opt)
6864 {
6865 	struct dentry *t_options;
6866 
6867 	t_options = trace_options_init_dentry(tr);
6868 	if (!t_options)
6869 		return;
6870 
6871 	topt->flags = flags;
6872 	topt->opt = opt;
6873 	topt->tr = tr;
6874 
6875 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6876 				    &trace_options_fops);
6877 
6878 }
6879 
6880 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)6881 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6882 {
6883 	struct trace_option_dentry *topts;
6884 	struct trace_options *tr_topts;
6885 	struct tracer_flags *flags;
6886 	struct tracer_opt *opts;
6887 	int cnt;
6888 	int i;
6889 
6890 	if (!tracer)
6891 		return;
6892 
6893 	flags = tracer->flags;
6894 
6895 	if (!flags || !flags->opts)
6896 		return;
6897 
6898 	/*
6899 	 * If this is an instance, only create flags for tracers
6900 	 * the instance may have.
6901 	 */
6902 	if (!trace_ok_for_array(tracer, tr))
6903 		return;
6904 
6905 	for (i = 0; i < tr->nr_topts; i++) {
6906 		/* Make sure there's no duplicate flags. */
6907 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6908 			return;
6909 	}
6910 
6911 	opts = flags->opts;
6912 
6913 	for (cnt = 0; opts[cnt].name; cnt++)
6914 		;
6915 
6916 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6917 	if (!topts)
6918 		return;
6919 
6920 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6921 			    GFP_KERNEL);
6922 	if (!tr_topts) {
6923 		kfree(topts);
6924 		return;
6925 	}
6926 
6927 	tr->topts = tr_topts;
6928 	tr->topts[tr->nr_topts].tracer = tracer;
6929 	tr->topts[tr->nr_topts].topts = topts;
6930 	tr->nr_topts++;
6931 
6932 	for (cnt = 0; opts[cnt].name; cnt++) {
6933 		create_trace_option_file(tr, &topts[cnt], flags,
6934 					 &opts[cnt]);
6935 		WARN_ONCE(topts[cnt].entry == NULL,
6936 			  "Failed to create trace option: %s",
6937 			  opts[cnt].name);
6938 	}
6939 }
6940 
6941 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)6942 create_trace_option_core_file(struct trace_array *tr,
6943 			      const char *option, long index)
6944 {
6945 	struct dentry *t_options;
6946 
6947 	t_options = trace_options_init_dentry(tr);
6948 	if (!t_options)
6949 		return NULL;
6950 
6951 	return trace_create_file(option, 0644, t_options,
6952 				 (void *)&tr->trace_flags_index[index],
6953 				 &trace_options_core_fops);
6954 }
6955 
create_trace_options_dir(struct trace_array * tr)6956 static void create_trace_options_dir(struct trace_array *tr)
6957 {
6958 	struct dentry *t_options;
6959 	bool top_level = tr == &global_trace;
6960 	int i;
6961 
6962 	t_options = trace_options_init_dentry(tr);
6963 	if (!t_options)
6964 		return;
6965 
6966 	for (i = 0; trace_options[i]; i++) {
6967 		if (top_level ||
6968 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6969 			create_trace_option_core_file(tr, trace_options[i], i);
6970 	}
6971 }
6972 
6973 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6974 rb_simple_read(struct file *filp, char __user *ubuf,
6975 	       size_t cnt, loff_t *ppos)
6976 {
6977 	struct trace_array *tr = filp->private_data;
6978 	char buf[64];
6979 	int r;
6980 
6981 	r = tracer_tracing_is_on(tr);
6982 	r = sprintf(buf, "%d\n", r);
6983 
6984 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6985 }
6986 
6987 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6988 rb_simple_write(struct file *filp, const char __user *ubuf,
6989 		size_t cnt, loff_t *ppos)
6990 {
6991 	struct trace_array *tr = filp->private_data;
6992 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6993 	unsigned long val;
6994 	int ret;
6995 
6996 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6997 	if (ret)
6998 		return ret;
6999 
7000 	if (buffer) {
7001 		mutex_lock(&trace_types_lock);
7002 		if (val) {
7003 			tracer_tracing_on(tr);
7004 			if (tr->current_trace->start)
7005 				tr->current_trace->start(tr);
7006 		} else {
7007 			tracer_tracing_off(tr);
7008 			if (tr->current_trace->stop)
7009 				tr->current_trace->stop(tr);
7010 		}
7011 		mutex_unlock(&trace_types_lock);
7012 	}
7013 
7014 	(*ppos)++;
7015 
7016 	return cnt;
7017 }
7018 
7019 static const struct file_operations rb_simple_fops = {
7020 	.open		= tracing_open_generic_tr,
7021 	.read		= rb_simple_read,
7022 	.write		= rb_simple_write,
7023 	.release	= tracing_release_generic_tr,
7024 	.llseek		= default_llseek,
7025 };
7026 
7027 struct dentry *trace_instance_dir;
7028 
7029 static void
7030 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7031 
7032 static int
allocate_trace_buffer(struct trace_array * tr,struct trace_buffer * buf,int size)7033 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7034 {
7035 	enum ring_buffer_flags rb_flags;
7036 
7037 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7038 
7039 	buf->tr = tr;
7040 
7041 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7042 	if (!buf->buffer)
7043 		return -ENOMEM;
7044 
7045 	buf->data = alloc_percpu(struct trace_array_cpu);
7046 	if (!buf->data) {
7047 		ring_buffer_free(buf->buffer);
7048 		buf->buffer = NULL;
7049 		return -ENOMEM;
7050 	}
7051 
7052 	/* Allocate the first page for all buffers */
7053 	set_buffer_entries(&tr->trace_buffer,
7054 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7055 
7056 	return 0;
7057 }
7058 
allocate_trace_buffers(struct trace_array * tr,int size)7059 static int allocate_trace_buffers(struct trace_array *tr, int size)
7060 {
7061 	int ret;
7062 
7063 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7064 	if (ret)
7065 		return ret;
7066 
7067 #ifdef CONFIG_TRACER_MAX_TRACE
7068 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7069 				    allocate_snapshot ? size : 1);
7070 	if (WARN_ON(ret)) {
7071 		ring_buffer_free(tr->trace_buffer.buffer);
7072 		tr->trace_buffer.buffer = NULL;
7073 		free_percpu(tr->trace_buffer.data);
7074 		tr->trace_buffer.data = NULL;
7075 		return -ENOMEM;
7076 	}
7077 	tr->allocated_snapshot = allocate_snapshot;
7078 
7079 	/*
7080 	 * Only the top level trace array gets its snapshot allocated
7081 	 * from the kernel command line.
7082 	 */
7083 	allocate_snapshot = false;
7084 #endif
7085 	return 0;
7086 }
7087 
free_trace_buffer(struct trace_buffer * buf)7088 static void free_trace_buffer(struct trace_buffer *buf)
7089 {
7090 	if (buf->buffer) {
7091 		ring_buffer_free(buf->buffer);
7092 		buf->buffer = NULL;
7093 		free_percpu(buf->data);
7094 		buf->data = NULL;
7095 	}
7096 }
7097 
free_trace_buffers(struct trace_array * tr)7098 static void free_trace_buffers(struct trace_array *tr)
7099 {
7100 	if (!tr)
7101 		return;
7102 
7103 	free_trace_buffer(&tr->trace_buffer);
7104 
7105 #ifdef CONFIG_TRACER_MAX_TRACE
7106 	free_trace_buffer(&tr->max_buffer);
7107 #endif
7108 }
7109 
init_trace_flags_index(struct trace_array * tr)7110 static void init_trace_flags_index(struct trace_array *tr)
7111 {
7112 	int i;
7113 
7114 	/* Used by the trace options files */
7115 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7116 		tr->trace_flags_index[i] = i;
7117 }
7118 
__update_tracer_options(struct trace_array * tr)7119 static void __update_tracer_options(struct trace_array *tr)
7120 {
7121 	struct tracer *t;
7122 
7123 	for (t = trace_types; t; t = t->next)
7124 		add_tracer_options(tr, t);
7125 }
7126 
update_tracer_options(struct trace_array * tr)7127 static void update_tracer_options(struct trace_array *tr)
7128 {
7129 	mutex_lock(&trace_types_lock);
7130 	__update_tracer_options(tr);
7131 	mutex_unlock(&trace_types_lock);
7132 }
7133 
instance_mkdir(const char * name)7134 static int instance_mkdir(const char *name)
7135 {
7136 	struct trace_array *tr;
7137 	int ret;
7138 
7139 	mutex_lock(&trace_types_lock);
7140 
7141 	ret = -EEXIST;
7142 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7143 		if (tr->name && strcmp(tr->name, name) == 0)
7144 			goto out_unlock;
7145 	}
7146 
7147 	ret = -ENOMEM;
7148 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7149 	if (!tr)
7150 		goto out_unlock;
7151 
7152 	tr->name = kstrdup(name, GFP_KERNEL);
7153 	if (!tr->name)
7154 		goto out_free_tr;
7155 
7156 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7157 		goto out_free_tr;
7158 
7159 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7160 
7161 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7162 
7163 	raw_spin_lock_init(&tr->start_lock);
7164 
7165 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7166 
7167 	tr->current_trace = &nop_trace;
7168 
7169 	INIT_LIST_HEAD(&tr->systems);
7170 	INIT_LIST_HEAD(&tr->events);
7171 
7172 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7173 		goto out_free_tr;
7174 
7175 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7176 	if (!tr->dir)
7177 		goto out_free_tr;
7178 
7179 	ret = event_trace_add_tracer(tr->dir, tr);
7180 	if (ret) {
7181 		tracefs_remove_recursive(tr->dir);
7182 		goto out_free_tr;
7183 	}
7184 
7185 	init_tracer_tracefs(tr, tr->dir);
7186 	init_trace_flags_index(tr);
7187 	__update_tracer_options(tr);
7188 
7189 	list_add(&tr->list, &ftrace_trace_arrays);
7190 
7191 	mutex_unlock(&trace_types_lock);
7192 
7193 	return 0;
7194 
7195  out_free_tr:
7196 	free_trace_buffers(tr);
7197 	free_cpumask_var(tr->tracing_cpumask);
7198 	kfree(tr->name);
7199 	kfree(tr);
7200 
7201  out_unlock:
7202 	mutex_unlock(&trace_types_lock);
7203 
7204 	return ret;
7205 
7206 }
7207 
instance_rmdir(const char * name)7208 static int instance_rmdir(const char *name)
7209 {
7210 	struct trace_array *tr;
7211 	int found = 0;
7212 	int ret;
7213 	int i;
7214 
7215 	mutex_lock(&trace_types_lock);
7216 
7217 	ret = -ENODEV;
7218 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7219 		if (tr->name && strcmp(tr->name, name) == 0) {
7220 			found = 1;
7221 			break;
7222 		}
7223 	}
7224 	if (!found)
7225 		goto out_unlock;
7226 
7227 	ret = -EBUSY;
7228 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7229 		goto out_unlock;
7230 
7231 	list_del(&tr->list);
7232 
7233 	/* Disable all the flags that were enabled coming in */
7234 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7235 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7236 			set_tracer_flag(tr, 1 << i, 0);
7237 	}
7238 
7239 	tracing_set_nop(tr);
7240 	event_trace_del_tracer(tr);
7241 	ftrace_clear_pids(tr);
7242 	ftrace_destroy_function_files(tr);
7243 	tracefs_remove_recursive(tr->dir);
7244 	free_trace_buffers(tr);
7245 
7246 	for (i = 0; i < tr->nr_topts; i++) {
7247 		kfree(tr->topts[i].topts);
7248 	}
7249 	kfree(tr->topts);
7250 
7251 	free_cpumask_var(tr->tracing_cpumask);
7252 	kfree(tr->name);
7253 	kfree(tr);
7254 
7255 	ret = 0;
7256 
7257  out_unlock:
7258 	mutex_unlock(&trace_types_lock);
7259 
7260 	return ret;
7261 }
7262 
create_trace_instances(struct dentry * d_tracer)7263 static __init void create_trace_instances(struct dentry *d_tracer)
7264 {
7265 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7266 							 instance_mkdir,
7267 							 instance_rmdir);
7268 	if (WARN_ON(!trace_instance_dir))
7269 		return;
7270 }
7271 
7272 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)7273 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7274 {
7275 	int cpu;
7276 
7277 	trace_create_file("available_tracers", 0444, d_tracer,
7278 			tr, &show_traces_fops);
7279 
7280 	trace_create_file("current_tracer", 0644, d_tracer,
7281 			tr, &set_tracer_fops);
7282 
7283 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7284 			  tr, &tracing_cpumask_fops);
7285 
7286 	trace_create_file("trace_options", 0644, d_tracer,
7287 			  tr, &tracing_iter_fops);
7288 
7289 	trace_create_file("trace", 0644, d_tracer,
7290 			  tr, &tracing_fops);
7291 
7292 	trace_create_file("trace_pipe", 0444, d_tracer,
7293 			  tr, &tracing_pipe_fops);
7294 
7295 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7296 			  tr, &tracing_entries_fops);
7297 
7298 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7299 			  tr, &tracing_total_entries_fops);
7300 
7301 	trace_create_file("free_buffer", 0200, d_tracer,
7302 			  tr, &tracing_free_buffer_fops);
7303 
7304 	trace_create_file("trace_marker", 0220, d_tracer,
7305 			  tr, &tracing_mark_fops);
7306 
7307 	trace_create_file("saved_tgids", 0444, d_tracer,
7308 			  tr, &tracing_saved_tgids_fops);
7309 
7310 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7311 			  &trace_clock_fops);
7312 
7313 	trace_create_file("tracing_on", 0644, d_tracer,
7314 			  tr, &rb_simple_fops);
7315 
7316 	create_trace_options_dir(tr);
7317 
7318 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7319 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7320 			&tr->max_latency, &tracing_max_lat_fops);
7321 #endif
7322 
7323 	if (ftrace_create_function_files(tr, d_tracer))
7324 		WARN(1, "Could not allocate function filter files");
7325 
7326 #ifdef CONFIG_TRACER_SNAPSHOT
7327 	trace_create_file("snapshot", 0644, d_tracer,
7328 			  tr, &snapshot_fops);
7329 #endif
7330 
7331 	for_each_tracing_cpu(cpu)
7332 		tracing_init_tracefs_percpu(tr, cpu);
7333 
7334 	ftrace_init_tracefs(tr, d_tracer);
7335 }
7336 
trace_automount(struct dentry * mntpt,void * ingore)7337 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7338 {
7339 	struct vfsmount *mnt;
7340 	struct file_system_type *type;
7341 
7342 	/*
7343 	 * To maintain backward compatibility for tools that mount
7344 	 * debugfs to get to the tracing facility, tracefs is automatically
7345 	 * mounted to the debugfs/tracing directory.
7346 	 */
7347 	type = get_fs_type("tracefs");
7348 	if (!type)
7349 		return NULL;
7350 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7351 	put_filesystem(type);
7352 	if (IS_ERR(mnt))
7353 		return NULL;
7354 	mntget(mnt);
7355 
7356 	return mnt;
7357 }
7358 
7359 /**
7360  * tracing_init_dentry - initialize top level trace array
7361  *
7362  * This is called when creating files or directories in the tracing
7363  * directory. It is called via fs_initcall() by any of the boot up code
7364  * and expects to return the dentry of the top level tracing directory.
7365  */
tracing_init_dentry(void)7366 struct dentry *tracing_init_dentry(void)
7367 {
7368 	struct trace_array *tr = &global_trace;
7369 
7370 	/* The top level trace array uses  NULL as parent */
7371 	if (tr->dir)
7372 		return NULL;
7373 
7374 	if (WARN_ON(!tracefs_initialized()) ||
7375 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7376 		 WARN_ON(!debugfs_initialized())))
7377 		return ERR_PTR(-ENODEV);
7378 
7379 	/*
7380 	 * As there may still be users that expect the tracing
7381 	 * files to exist in debugfs/tracing, we must automount
7382 	 * the tracefs file system there, so older tools still
7383 	 * work with the newer kerenl.
7384 	 */
7385 	tr->dir = debugfs_create_automount("tracing", NULL,
7386 					   trace_automount, NULL);
7387 	if (!tr->dir) {
7388 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7389 		return ERR_PTR(-ENOMEM);
7390 	}
7391 
7392 	return NULL;
7393 }
7394 
7395 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7396 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7397 
trace_enum_init(void)7398 static void __init trace_enum_init(void)
7399 {
7400 	int len;
7401 
7402 	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7403 	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7404 }
7405 
7406 #ifdef CONFIG_MODULES
trace_module_add_enums(struct module * mod)7407 static void trace_module_add_enums(struct module *mod)
7408 {
7409 	if (!mod->num_trace_enums)
7410 		return;
7411 
7412 	/*
7413 	 * Modules with bad taint do not have events created, do
7414 	 * not bother with enums either.
7415 	 */
7416 	if (trace_module_has_bad_taint(mod))
7417 		return;
7418 
7419 	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7420 }
7421 
7422 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
trace_module_remove_enums(struct module * mod)7423 static void trace_module_remove_enums(struct module *mod)
7424 {
7425 	union trace_enum_map_item *map;
7426 	union trace_enum_map_item **last = &trace_enum_maps;
7427 
7428 	if (!mod->num_trace_enums)
7429 		return;
7430 
7431 	mutex_lock(&trace_enum_mutex);
7432 
7433 	map = trace_enum_maps;
7434 
7435 	while (map) {
7436 		if (map->head.mod == mod)
7437 			break;
7438 		map = trace_enum_jmp_to_tail(map);
7439 		last = &map->tail.next;
7440 		map = map->tail.next;
7441 	}
7442 	if (!map)
7443 		goto out;
7444 
7445 	*last = trace_enum_jmp_to_tail(map)->tail.next;
7446 	kfree(map);
7447  out:
7448 	mutex_unlock(&trace_enum_mutex);
7449 }
7450 #else
trace_module_remove_enums(struct module * mod)7451 static inline void trace_module_remove_enums(struct module *mod) { }
7452 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7453 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)7454 static int trace_module_notify(struct notifier_block *self,
7455 			       unsigned long val, void *data)
7456 {
7457 	struct module *mod = data;
7458 
7459 	switch (val) {
7460 	case MODULE_STATE_COMING:
7461 		trace_module_add_enums(mod);
7462 		break;
7463 	case MODULE_STATE_GOING:
7464 		trace_module_remove_enums(mod);
7465 		break;
7466 	}
7467 
7468 	return 0;
7469 }
7470 
7471 static struct notifier_block trace_module_nb = {
7472 	.notifier_call = trace_module_notify,
7473 	.priority = 0,
7474 };
7475 #endif /* CONFIG_MODULES */
7476 
tracer_init_tracefs(void)7477 static __init int tracer_init_tracefs(void)
7478 {
7479 	struct dentry *d_tracer;
7480 
7481 	trace_access_lock_init();
7482 
7483 	d_tracer = tracing_init_dentry();
7484 	if (IS_ERR(d_tracer))
7485 		return 0;
7486 
7487 	init_tracer_tracefs(&global_trace, d_tracer);
7488 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7489 
7490 	trace_create_file("tracing_thresh", 0644, d_tracer,
7491 			&global_trace, &tracing_thresh_fops);
7492 
7493 	trace_create_file("README", 0444, d_tracer,
7494 			NULL, &tracing_readme_fops);
7495 
7496 	trace_create_file("saved_cmdlines", 0444, d_tracer,
7497 			NULL, &tracing_saved_cmdlines_fops);
7498 
7499 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7500 			  NULL, &tracing_saved_cmdlines_size_fops);
7501 
7502 	trace_enum_init();
7503 
7504 	trace_create_enum_file(d_tracer);
7505 
7506 #ifdef CONFIG_MODULES
7507 	register_module_notifier(&trace_module_nb);
7508 #endif
7509 
7510 #ifdef CONFIG_DYNAMIC_FTRACE
7511 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7512 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7513 #endif
7514 
7515 	create_trace_instances(d_tracer);
7516 
7517 	update_tracer_options(&global_trace);
7518 
7519 	return 0;
7520 }
7521 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)7522 static int trace_panic_handler(struct notifier_block *this,
7523 			       unsigned long event, void *unused)
7524 {
7525 	if (ftrace_dump_on_oops)
7526 		ftrace_dump(ftrace_dump_on_oops);
7527 	return NOTIFY_OK;
7528 }
7529 
7530 static struct notifier_block trace_panic_notifier = {
7531 	.notifier_call  = trace_panic_handler,
7532 	.next           = NULL,
7533 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7534 };
7535 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)7536 static int trace_die_handler(struct notifier_block *self,
7537 			     unsigned long val,
7538 			     void *data)
7539 {
7540 	switch (val) {
7541 	case DIE_OOPS:
7542 		if (ftrace_dump_on_oops)
7543 			ftrace_dump(ftrace_dump_on_oops);
7544 		break;
7545 	default:
7546 		break;
7547 	}
7548 	return NOTIFY_OK;
7549 }
7550 
7551 static struct notifier_block trace_die_notifier = {
7552 	.notifier_call = trace_die_handler,
7553 	.priority = 200
7554 };
7555 
7556 /*
7557  * printk is set to max of 1024, we really don't need it that big.
7558  * Nothing should be printing 1000 characters anyway.
7559  */
7560 #define TRACE_MAX_PRINT		1000
7561 
7562 /*
7563  * Define here KERN_TRACE so that we have one place to modify
7564  * it if we decide to change what log level the ftrace dump
7565  * should be at.
7566  */
7567 #define KERN_TRACE		KERN_EMERG
7568 
7569 void
trace_printk_seq(struct trace_seq * s)7570 trace_printk_seq(struct trace_seq *s)
7571 {
7572 	/* Probably should print a warning here. */
7573 	if (s->seq.len >= TRACE_MAX_PRINT)
7574 		s->seq.len = TRACE_MAX_PRINT;
7575 
7576 	/*
7577 	 * More paranoid code. Although the buffer size is set to
7578 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7579 	 * an extra layer of protection.
7580 	 */
7581 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7582 		s->seq.len = s->seq.size - 1;
7583 
7584 	/* should be zero ended, but we are paranoid. */
7585 	s->buffer[s->seq.len] = 0;
7586 
7587 	printk(KERN_TRACE "%s", s->buffer);
7588 
7589 	trace_seq_init(s);
7590 }
7591 
trace_init_global_iter(struct trace_iterator * iter)7592 void trace_init_global_iter(struct trace_iterator *iter)
7593 {
7594 	iter->tr = &global_trace;
7595 	iter->trace = iter->tr->current_trace;
7596 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7597 	iter->trace_buffer = &global_trace.trace_buffer;
7598 
7599 	if (iter->trace && iter->trace->open)
7600 		iter->trace->open(iter);
7601 
7602 	/* Annotate start of buffers if we had overruns */
7603 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
7604 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
7605 
7606 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
7607 	if (trace_clocks[iter->tr->clock_id].in_ns)
7608 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7609 }
7610 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)7611 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7612 {
7613 	/* use static because iter can be a bit big for the stack */
7614 	static struct trace_iterator iter;
7615 	static atomic_t dump_running;
7616 	struct trace_array *tr = &global_trace;
7617 	unsigned int old_userobj;
7618 	unsigned long flags;
7619 	int cnt = 0, cpu;
7620 
7621 	/* Only allow one dump user at a time. */
7622 	if (atomic_inc_return(&dump_running) != 1) {
7623 		atomic_dec(&dump_running);
7624 		return;
7625 	}
7626 
7627 	/*
7628 	 * Always turn off tracing when we dump.
7629 	 * We don't need to show trace output of what happens
7630 	 * between multiple crashes.
7631 	 *
7632 	 * If the user does a sysrq-z, then they can re-enable
7633 	 * tracing with echo 1 > tracing_on.
7634 	 */
7635 	tracing_off();
7636 
7637 	local_irq_save(flags);
7638 
7639 	/* Simulate the iterator */
7640 	trace_init_global_iter(&iter);
7641 
7642 	for_each_tracing_cpu(cpu) {
7643 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7644 	}
7645 
7646 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7647 
7648 	/* don't look at user memory in panic mode */
7649 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7650 
7651 	switch (oops_dump_mode) {
7652 	case DUMP_ALL:
7653 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7654 		break;
7655 	case DUMP_ORIG:
7656 		iter.cpu_file = raw_smp_processor_id();
7657 		break;
7658 	case DUMP_NONE:
7659 		goto out_enable;
7660 	default:
7661 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7662 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7663 	}
7664 
7665 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7666 
7667 	/* Did function tracer already get disabled? */
7668 	if (ftrace_is_dead()) {
7669 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7670 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7671 	}
7672 
7673 	/*
7674 	 * We need to stop all tracing on all CPUS to read the
7675 	 * the next buffer. This is a bit expensive, but is
7676 	 * not done often. We fill all what we can read,
7677 	 * and then release the locks again.
7678 	 */
7679 
7680 	while (!trace_empty(&iter)) {
7681 
7682 		if (!cnt)
7683 			printk(KERN_TRACE "---------------------------------\n");
7684 
7685 		cnt++;
7686 
7687 		/* reset all but tr, trace, and overruns */
7688 		memset(&iter.seq, 0,
7689 		       sizeof(struct trace_iterator) -
7690 		       offsetof(struct trace_iterator, seq));
7691 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7692 		iter.pos = -1;
7693 
7694 		if (trace_find_next_entry_inc(&iter) != NULL) {
7695 			int ret;
7696 
7697 			ret = print_trace_line(&iter);
7698 			if (ret != TRACE_TYPE_NO_CONSUME)
7699 				trace_consume(&iter);
7700 		}
7701 		touch_nmi_watchdog();
7702 
7703 		trace_printk_seq(&iter.seq);
7704 	}
7705 
7706 	if (!cnt)
7707 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7708 	else
7709 		printk(KERN_TRACE "---------------------------------\n");
7710 
7711  out_enable:
7712 	tr->trace_flags |= old_userobj;
7713 
7714 	for_each_tracing_cpu(cpu) {
7715 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7716 	}
7717  	atomic_dec(&dump_running);
7718 	local_irq_restore(flags);
7719 }
7720 EXPORT_SYMBOL_GPL(ftrace_dump);
7721 
tracer_alloc_buffers(void)7722 __init static int tracer_alloc_buffers(void)
7723 {
7724 	int ring_buf_size;
7725 	int ret = -ENOMEM;
7726 
7727 	/*
7728 	 * Make sure we don't accidently add more trace options
7729 	 * than we have bits for.
7730 	 */
7731 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7732 
7733 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7734 		goto out;
7735 
7736 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7737 		goto out_free_buffer_mask;
7738 
7739 	/* Only allocate trace_printk buffers if a trace_printk exists */
7740 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7741 		/* Must be called before global_trace.buffer is allocated */
7742 		trace_printk_init_buffers();
7743 
7744 	/* To save memory, keep the ring buffer size to its minimum */
7745 	if (ring_buffer_expanded)
7746 		ring_buf_size = trace_buf_size;
7747 	else
7748 		ring_buf_size = 1;
7749 
7750 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7751 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7752 
7753 	raw_spin_lock_init(&global_trace.start_lock);
7754 
7755 	/* Used for event triggers */
7756 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7757 	if (!temp_buffer)
7758 		goto out_free_cpumask;
7759 
7760 	if (trace_create_savedcmd() < 0)
7761 		goto out_free_temp_buffer;
7762 
7763 	/* TODO: make the number of buffers hot pluggable with CPUS */
7764 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7765 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7766 		WARN_ON(1);
7767 		goto out_free_savedcmd;
7768 	}
7769 
7770 	if (global_trace.buffer_disabled)
7771 		tracing_off();
7772 
7773 	if (trace_boot_clock) {
7774 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7775 		if (ret < 0)
7776 			pr_warn("Trace clock %s not defined, going back to default\n",
7777 				trace_boot_clock);
7778 	}
7779 
7780 	/*
7781 	 * register_tracer() might reference current_trace, so it
7782 	 * needs to be set before we register anything. This is
7783 	 * just a bootstrap of current_trace anyway.
7784 	 */
7785 	global_trace.current_trace = &nop_trace;
7786 
7787 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7788 
7789 	ftrace_init_global_array_ops(&global_trace);
7790 
7791 	init_trace_flags_index(&global_trace);
7792 
7793 	register_tracer(&nop_trace);
7794 
7795 	/* All seems OK, enable tracing */
7796 	tracing_disabled = 0;
7797 
7798 	atomic_notifier_chain_register(&panic_notifier_list,
7799 				       &trace_panic_notifier);
7800 
7801 	register_die_notifier(&trace_die_notifier);
7802 
7803 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7804 
7805 	INIT_LIST_HEAD(&global_trace.systems);
7806 	INIT_LIST_HEAD(&global_trace.events);
7807 	list_add(&global_trace.list, &ftrace_trace_arrays);
7808 
7809 	apply_trace_boot_options();
7810 
7811 	register_snapshot_cmd();
7812 
7813 	return 0;
7814 
7815 out_free_savedcmd:
7816 	free_saved_cmdlines_buffer(savedcmd);
7817 out_free_temp_buffer:
7818 	ring_buffer_free(temp_buffer);
7819 out_free_cpumask:
7820 	free_cpumask_var(global_trace.tracing_cpumask);
7821 out_free_buffer_mask:
7822 	free_cpumask_var(tracing_buffer_mask);
7823 out:
7824 	return ret;
7825 }
7826 
trace_init(void)7827 void __init trace_init(void)
7828 {
7829 	if (tracepoint_printk) {
7830 		tracepoint_print_iter =
7831 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7832 		if (WARN_ON(!tracepoint_print_iter))
7833 			tracepoint_printk = 0;
7834 	}
7835 	tracer_alloc_buffers();
7836 	trace_event_init();
7837 }
7838 
clear_boot_tracer(void)7839 __init static int clear_boot_tracer(void)
7840 {
7841 	/*
7842 	 * The default tracer at boot buffer is an init section.
7843 	 * This function is called in lateinit. If we did not
7844 	 * find the boot tracer, then clear it out, to prevent
7845 	 * later registration from accessing the buffer that is
7846 	 * about to be freed.
7847 	 */
7848 	if (!default_bootup_tracer)
7849 		return 0;
7850 
7851 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7852 	       default_bootup_tracer);
7853 	default_bootup_tracer = NULL;
7854 
7855 	return 0;
7856 }
7857 
7858 fs_initcall(tracer_init_tracefs);
7859 late_initcall_sync(clear_boot_tracer);
7860